We have a product using PostgreSQL database server that is deployed at a couple of hundred clients. Some of them have gathered tens of gigabytes of data over th
OK, I worked my way through it.
I simplified/reworked the view to split it up in the following two:
CREATE OR REPLACE VIEW
bloat_datawidth AS
SELECT
ns.nspname AS schemaname,
tbl.oid AS relid,
tbl.relname,
CASE
WHEN every(avg_width IS NOT NULL)
THEN SUM((1-null_frac)*avg_width) + MAX(null_frac) * 24
ELSE NULL
END AS datawidth
FROM
pg_attribute att
JOIN
pg_class tbl
ON
att.attrelid = tbl.oid
JOIN
pg_namespace ns
ON
ns.oid = tbl.relnamespace
LEFT JOIN
pg_stats s
ON
s.schemaname=ns.nspname
AND s.tablename = tbl.relname
AND s.inherited=false
AND s.attname=att.attname
WHERE
att.attnum > 0
AND tbl.relkind='r'
GROUP BY
1,2,3;
And
CREATE OR REPLACE VIEW
bloat_tables AS
SELECT
bdw.schemaname,
bdw.relname,
bdw.datawidth,
cc.reltuples::bigint,
cc.relpages::bigint,
ceil(cc.reltuples*bdw.datawidth/current_setting('block_size')::NUMERIC)::bigint AS expectedpages,
100 - (cc.reltuples*100*bdw.datawidth)/(current_setting('block_size')::NUMERIC*cc.relpages) AS bloatpct
FROM
bloat_datawidth bdw
JOIN
pg_class cc
ON
cc.oid = bdw.relid
AND cc.relpages > 1
AND bdw.datawidth IS NOT NULL;
And the cron job:
#!/bin/bash
MIN_BLOAT=65
MIN_WASTED_PAGES=100
LOG_FILE=/var/log/postgresql/bloat.log
DATABASE=unity-stationmaster
SCHEMA=public
if [[ "$(id -un)" != "postgres" ]]
then
echo "You need to be user postgres to run this script."
exit 1
fi
TABLENAME=`psql $DATABASE -t -A -c "select relname from bloat_tables where bloatpct > $MIN_BLOAT and relpages-expectedpages > $MIN_WASTED_PAGES and schemaname ='$SCHEMA' order by wastedpages desc limit 1"`
if [[ -z "$TABLENAME" ]]
then
echo "No bloated tables." >> $LOG_FILE
exit 0
fi
vacuumdb -v -f -t $TABLENAME $DATABASE >> $LOG_FILE