Table Bloat And Last Time Autovacuum Ran
Note: we have to analyze all tables to see what we are working with.
SELECT tblname, bs*tblpages AS real_size,
(tblpages-est_tblpages)*bs AS extra_size,
CASE WHEN tblpages > 0 AND tblpages - est_tblpages > 0
THEN 100 * (tblpages - est_tblpages)/tblpages::float
ELSE 0
END AS extra_pct, fillfactor,
CASE WHEN tblpages - est_tblpages_ff > 0
THEN (tblpages-est_tblpages_ff)*bs
ELSE 0
END AS bloat_size,
CASE WHEN tblpages > 0 AND tblpages - est_tblpages_ff > 0
THEN 100 * (tblpages - est_tblpages_ff)/tblpages::float
ELSE 0
END AS bloat_pct, is_na
FROM (
SELECT ceil(reltuples / ((bs-page_hdr)/tpl_size)) + ceil(toasttuples / 4) AS est_tblpages,
ceil(reltuples / ((bs-page_hdr)*fillfactor/(tpl_size*100))) + ceil(toasttuples / 4) AS est_tblpages_ff,
tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na
FROM (
SELECT (4 + tpl_hdr_size + tpl_data_size + (2*ma)
- CASE WHEN tpl_hdr_size%ma = 0 THEN ma ELSE tpl_hdr_size%ma END
- CASE WHEN ceil(tpl_data_size)::int%ma = 0 THEN ma ELSE ceil(tpl_data_size)::int%ma END
) AS tpl_size, bs - page_hdr AS size_per_block, (heappages + toastpages) AS tblpages, heappages,
toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
FROM (
SELECT tbl.oid AS tblid, ns.nspname AS schemaname, tbl.relname AS tblname, tbl.reltuples,
tbl.relpages AS heappages, coalesce(toast.relpages, 0) AS toastpages,
coalesce(toast.reltuples, 0) AS toasttuples,
coalesce(substring(array_to_string(tbl.reloptions, ' ') FROM 'fillfactor=([0-9]+)')::smallint, 100) AS fillfactor,
current_setting('block_size')::numeric AS bs,
CASE WHEN version()~'mingw32' OR version()~'64-bit|x86_64|ppc64|ia64|amd64' THEN 8 ELSE 4 END AS ma,
24 AS page_hdr,
23 + CASE WHEN MAX(coalesce(s.null_frac,0)) > 0 THEN (7 + count(s.attname)) / 8 ELSE 0::int END
+ CASE WHEN bool_or(att.attname = 'oid' and att.attnum < 0) THEN 4 ELSE 0 END AS tpl_hdr_size,
sum((1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0)) AS tpl_data_size,
bool_or(att.atttypid = 'pg_catalog.name'::regtype) OR sum(CASE WHEN att.attnum > 0 THEN 1 ELSE 0 END) <> count(s.attname) AS is_na
FROM pg_attribute AS att
JOIN pg_class AS tbl ON att.attrelid = tbl.oid
JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace
LEFT JOIN pg_stats AS s ON s.schemaname=ns.nspname AND s.tablename=tbl.relname AND s.inherited=false AND s.attname=att.attname
LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid
WHERE NOT att.attisdropped
AND tbl.relkind in ('r','m')
GROUP BY 1,2,3,4,5,6,7,8,9,10
ORDER BY 2,3
) AS s
) AS s2
) AS s3
ORDER BY bloat_pct desc;
Index Bloat
SELECT current_database(), nspname AS schemaname, tblname, idxname,
bs*(relpages)::bigint AS real_size,
bs*(relpages-est_pages)::bigint AS extra_size,
100 * (relpages-est_pages)::float / relpages AS extra_pct,
fillfactor,
CASE WHEN relpages > est_pages_ff THEN bs*(relpages-est_pages_ff) ELSE 0 END AS bloat_size,
100 * (relpages-est_pages_ff)::float / relpages AS bloat_pct,
is_na
FROM (
SELECT coalesce(1 + ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0) AS est_pages,
coalesce(1 + ceil(reltuples/floor((bs-pageopqdata-pagehdr)*fillfactor/(100*(4+nulldatahdrwidth)::float))), 0) AS est_pages_ff,
bs, nspname, tblname, idxname, relpages, fillfactor, is_na
FROM (
SELECT maxalign, bs, nspname, tblname, idxname, reltuples, relpages, idxoid, fillfactor,
(index_tuple_hdr_bm + maxalign - CASE WHEN index_tuple_hdr_bm%maxalign = 0 THEN maxalign ELSE index_tuple_hdr_bm%maxalign END
+ nulldatawidth + maxalign - CASE WHEN nulldatawidth = 0 THEN 0 WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign ELSE nulldatawidth::integer%maxalign END
)::numeric AS nulldatahdrwidth, pagehdr, pageopqdata, is_na
FROM (
SELECT n.nspname, i.tblname, i.idxname, i.reltuples, i.relpages, i.idxoid, i.fillfactor,
current_setting('block_size')::numeric AS bs,
CASE WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' THEN 8 ELSE 4 END AS maxalign,
24 AS pagehdr,
16 AS pageopqdata,
CASE WHEN max(coalesce(s.null_frac,0)) = 0 THEN 8 ELSE 8 + ((32 + 8 - 1) / 8) END AS index_tuple_hdr_bm,
sum((1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 1024)) AS nulldatawidth,
max(CASE WHEN i.atttypid = 'pg_catalog.name'::regtype THEN 1 ELSE 0 END) > 0 AS is_na
FROM (
SELECT ct.relname AS tblname, ct.relnamespace, ic.idxname, ic.attpos, ic.indkey, ic.indkey[ic.attpos], ic.reltuples, ic.relpages, ic.tbloid, ic.idxoid, ic.fillfactor,
coalesce(a1.attnum, a2.attnum) AS attnum, coalesce(a1.attname, a2.attname) AS attname, coalesce(a1.atttypid, a2.atttypid) AS atttypid,
CASE WHEN a1.attnum IS NULL THEN ic.idxname ELSE ct.relname END AS attrelname
FROM (
SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor, indkey, pg_catalog.generate_series(1,indnatts) AS attpos
FROM (
SELECT ci.relname AS idxname, ci.reltuples, ci.relpages, i.indrelid AS tbloid, i.indexrelid AS idxoid,
coalesce(substring(array_to_string(ci.reloptions, ' ') from 'fillfactor=([0-9]+)')::smallint, 90) AS fillfactor,
i.indnatts, pg_catalog.string_to_array(pg_catalog.textin(pg_catalog.int2vectorout(i.indkey)),' ')::int[] AS indkey
FROM pg_catalog.pg_index i
JOIN pg_catalog.pg_class ci ON ci.oid = i.indexrelid
WHERE ci.relam=(SELECT oid FROM pg_am WHERE amname = 'btree') AND ci.relpages > 0
) AS idx_data
) AS ic
JOIN pg_catalog.pg_class ct ON ct.oid = ic.tbloid
LEFT JOIN pg_catalog.pg_attribute a1 ON ic.indkey[ic.attpos] <> 0 AND a1.attrelid = ic.tbloid AND a1.attnum = ic.indkey[ic.attpos]
LEFT JOIN pg_catalog.pg_attribute a2 ON ic.indkey[ic.attpos] = 0 AND a2.attrelid = ic.idxoid AND a2.attnum = ic.attpos
) i
JOIN pg_catalog.pg_namespace n ON n.oid = i.relnamespace
JOIN pg_catalog.pg_stats s ON s.schemaname = n.nspname AND s.tablename = i.attrelname AND s.attname = i.attname
GROUP BY 1,2,3,4,5,6,7,8,9,10,11
) AS rows_data_stats
) AS rows_hdr_pdg_stats
) AS relation_stats
ORDER BY extra_pct desc;
Last Autovacuum of Indexes
select schemaname, relname, indexrelname, pg_stat_get_last_autovacuum_time(relid) as last_autovacuum
from pg_stat_user_indexes;
Indexes are autovacuumed along with the table:
inapp=> select t.relname, last_autovacuum, indexrelname, pg_stat_get_last_autovacuum_time(i.relid) as last_autovacuum_index
from pg_stat_user_tables t
join pg_stat_user_indexes i on t.relname = i.relname
where t.last_autovacuum is not null
order by last_autovacuum desc
limit 5;
relname | last_autovacuum | indexrelname | last_autovacuum_index
-----------------------------------+-------------------------------+---------------------------------------------------------+-------------------------------
campaign_contact_reference_423075 | 2024-08-28 12:35:42.459008+00 | campaign_contact_reference_423075_contact_reference_idx | 2024-08-28 12:35:42.459008+00
campaign | 2024-08-28 12:33:44.564136+00 | campaign_id_unique | 2024-08-28 12:33:44.564136+00
campaign | 2024-08-28 12:33:44.564136+00 | campaign_name_unique | 2024-08-28 12:33:44.564136+00
campaign | 2024-08-28 12:33:44.564136+00 | campaigns_pkey | 2024-08-28 12:33:44.564136+00
campaign | 2024-08-28 12:33:44.564136+00 | campaign_application_id_index | 2024-08-28 12:33:44.564136+00
Improvements In Postgres for Vacuuming
-
Version 14 Numerous performance improvements have been made for vacuuming, VACUUM automatically becomes more aggressive
-
Version 15 minor fixes
-
Version 16 minor fixes
I.e. because of the changes in Postgres 14 an upgrade is worthwhile.
Action Items
-
Increase autovacuum max workers because we have a lot of tables (currently 5,876). A database restart is required. Already increases from 3 to 6 on 2024/08/28.
-
Analyze all tables and sort by ratio of dead tuples. Note: autovacuum workers will automatically issue ANALYZE commands whenever the content of a table has changed sufficiently.
-
Install pgstattuple which provides various functions to obtain tuple-level statistics
-
Run table and index bloat queries regularly and alert when above a certain limit
-
When necessary adjust other global autovacuum settings
-
Look into pg_repack extension which lets you remove bloat from tables and indexes without holding an exclusive lock on the processed tables during processing
-
Look into
-
Update to Postgres 16