I\'m looking for a query that returns a result of the form for any database (see example below supposing total space used by the database is 40GB)
schema | si
https://www.depesz.com/2018/02/17/which-schema-is-using-the-most-disk-space/
shows a solution that counts the TOAST tabels as well. Tested on PG12:
WITH recursive all_elements AS (
SELECT 'base/' || l.filename AS path, x.*
FROM
pg_ls_dir('base/') AS l (filename),
LATERAL pg_stat_file( 'base/' || l.filename) AS x
UNION ALL
SELECT 'pg_tblspc/' || l.filename AS path, x.*
FROM
pg_ls_dir('pg_tblspc/') AS l (filename),
LATERAL pg_stat_file( 'pg_tblspc/' || l.filename) AS x
UNION ALL
SELECT
u.path || '/' || l.filename, x.*
FROM
all_elements u,
lateral pg_ls_dir(u.path) AS l(filename),
lateral pg_stat_file( u.path || '/' || l.filename ) AS x
WHERE
u.isdir
), all_files AS (
SELECT path, SIZE FROM all_elements WHERE NOT isdir
), interesting_files AS (
SELECT
regexp_replace(
regexp_replace(f.path, '.*/', ''),
'\.[0-9]*$',
''
) AS filename,
SUM( f.size )
FROM
pg_database d,
all_files f
WHERE
d.datname = current_database() AND
f.path ~ ( '/' || d.oid || E'/[0-9]+(\\.[0-9]+)?$' )
GROUP BY filename
)
SELECT
n.nspname AS schema_name,
SUM( f.sum ) AS total_schema_size
FROM
interesting_files f
JOIN pg_class c ON f.filename::oid = c.relfilenode
LEFT OUTER JOIN pg_class dtc ON dtc.reltoastrelid = c.oid AND c.relkind = 't'
JOIN pg_namespace n ON COALESCE( dtc.relnamespace, c.relnamespace ) = n.oid
GROUP BY
n.nspname
ORDER BY
total_schema_size DESC
Better solution:
WITH
schemas AS (
SELECT schemaname as name, sum(pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))::bigint as size FROM pg_tables
GROUP BY schemaname
),
db AS (
SELECT pg_database_size(current_database()) AS size
)
SELECT schemas.name,
pg_size_pretty(schemas.size) as absolute_size,
schemas.size::float / (SELECT size FROM db) * 100 as relative_size
FROM schemas;
The accepted answer solves the described problem, but the suggested query is not efficient. You can do EXPLAIN to see the difference:
EXPLAIN WITH
schemas AS (
SELECT schemaname as name, sum(pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))::bigint as size FROM pg_tables
GROUP BY schemaname
),
db AS (SELECT pg_database_size(current_database()) AS size)
SELECT schemas.name,
pg_size_pretty(schemas.size) as absolute_size,
schemas.size::float / (SELECT size FROM db) * 100 as relative_size
FROM schemas;
QUERY PLAN
------------------------------------------------------------------------------------------------------------
CTE Scan on schemas (cost=47100.79..47634.34 rows=16417 width=104)
CTE schemas
-> Finalize HashAggregate (cost=46854.50..47100.76 rows=16417 width=72)
Group Key: n.nspname
-> Gather (cost=43119.63..46608.25 rows=32834 width=96)
Workers Planned: 2
-> Partial HashAggregate (cost=42119.63..42324.85 rows=16417 width=96)
Group Key: n.nspname
-> Hash Left Join (cost=744.38..39763.93 rows=94228 width=128)
Hash Cond: (c.relnamespace = n.oid)
-> Parallel Seq Scan on pg_class c (cost=0.00..38772.14 rows=94228 width=72)
Filter: (relkind = ANY ('{r,p}'::"char"[]))
-> Hash (cost=539.17..539.17 rows=16417 width=68)
-> Seq Scan on pg_namespace n (cost=0.00..539.17 rows=16417 width=68)
CTE db
-> Result (cost=0.00..0.01 rows=1 width=8)
InitPlan 3 (returns $3)
-> CTE Scan on db (cost=0.00..0.02 rows=1 width=8)
vs
EXPLAIN SELECT schema_name,
pg_size_pretty(sum(table_size)::bigint),
(sum(table_size) / pg_database_size(current_database())) * 100
FROM (
SELECT pg_catalog.pg_namespace.nspname as schema_name,
pg_relation_size(pg_catalog.pg_class.oid) as table_size
FROM pg_catalog.pg_class
JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid
) t
GROUP BY schema_name
ORDER BY schema_name;
QUERY PLAN
-------------------------------------------------------------------------------------------
GroupAggregate (cost=283636.24..334759.75 rows=1202906 width=128)
Group Key: pg_namespace.nspname
-> Sort (cost=283636.24..286643.51 rows=1202906 width=72)
Sort Key: pg_namespace.nspname
-> Hash Join (cost=744.38..51446.15 rows=1202906 width=72)
Hash Cond: (pg_class.relnamespace = pg_namespace.oid)
-> Seq Scan on pg_class (cost=0.00..44536.06 rows=1202906 width=8)
-> Hash (cost=539.17..539.17 rows=16417 width=68)
-> Seq Scan on pg_namespace (cost=0.00..539.17 rows=16417 width=68)
Try this:
SELECT schema_name, sum(table_size), (sum(table_size) / database_size) * 100 FROM ( SELECT pg_catalog.pg_namespace.nspname as schema_name, pg_relation_size(pg_catalog.pg_class.oid) as table_size, sum(pg_relation_size(pg_catalog.pg_class.oid)) over () as database_size FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid ) t GROUP BY schema_name, database_size
Edit: just noticed the workaround with summing up all tables to get the database size is not necessary:
SELECT schema_name, pg_size_pretty(sum(table_size)::bigint), (sum(table_size) / pg_database_size(current_database())) * 100 FROM ( SELECT pg_catalog.pg_namespace.nspname as schema_name, pg_relation_size(pg_catalog.pg_class.oid) as table_size FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid ) t GROUP BY schema_name ORDER BY schema_name