list of schema with sizes (relative and absolute) in a PostgreSQL database

前端 未结 3 801
闹比i
闹比i 2020-12-23 14:00

I\'m looking for a query that returns a result of the form for any database (see example below supposing total space used by the database is 40GB)

schema | si         


        
相关标签:
3条回答
  • 2020-12-23 14:37

    https://www.depesz.com/2018/02/17/which-schema-is-using-the-most-disk-space/

    shows a solution that counts the TOAST tabels as well. Tested on PG12:

     WITH recursive all_elements AS (
        SELECT 'base/' || l.filename AS path, x.*
        FROM
            pg_ls_dir('base/') AS l (filename),
            LATERAL pg_stat_file( 'base/' || l.filename) AS x
        UNION ALL
        SELECT 'pg_tblspc/' || l.filename AS path, x.*
        FROM
            pg_ls_dir('pg_tblspc/') AS l (filename),
            LATERAL pg_stat_file( 'pg_tblspc/' || l.filename) AS x
        UNION ALL
        SELECT
            u.path || '/' || l.filename, x.*
        FROM
            all_elements u,
            lateral pg_ls_dir(u.path) AS l(filename),
            lateral pg_stat_file( u.path || '/' || l.filename ) AS x
        WHERE
            u.isdir
    ), all_files AS (
        SELECT path, SIZE FROM all_elements WHERE NOT isdir
    ), interesting_files AS (
        SELECT
            regexp_replace(
                regexp_replace(f.path, '.*/', ''),
                '\.[0-9]*$',
                ''
            ) AS filename,
            SUM( f.size )
        FROM
            pg_database d,
            all_files f
        WHERE
            d.datname = current_database() AND
            f.path ~ ( '/' || d.oid || E'/[0-9]+(\\.[0-9]+)?$' )
        GROUP BY filename
    )
    SELECT
        n.nspname AS schema_name,
        SUM( f.sum ) AS total_schema_size
    FROM
        interesting_files f
        JOIN pg_class c ON f.filename::oid = c.relfilenode
        LEFT OUTER JOIN pg_class dtc ON dtc.reltoastrelid = c.oid AND c.relkind = 't'
        JOIN pg_namespace n ON COALESCE( dtc.relnamespace, c.relnamespace ) = n.oid
    GROUP BY
        n.nspname
    ORDER BY
        total_schema_size DESC
    
    0 讨论(0)
  • 2020-12-23 14:54

    Better solution:

    WITH 
    
    schemas AS (
    SELECT schemaname as name, sum(pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))::bigint as size FROM pg_tables
    GROUP BY schemaname
    ),
    
    db AS (
    SELECT pg_database_size(current_database()) AS size
    )
    
    SELECT schemas.name, 
           pg_size_pretty(schemas.size) as absolute_size,
           schemas.size::float / (SELECT size FROM db)  * 100 as relative_size
    FROM schemas;
    

    The accepted answer solves the described problem, but the suggested query is not efficient. You can do EXPLAIN to see the difference:

    EXPLAIN WITH 
    
    schemas AS (
    SELECT schemaname as name, sum(pg_relation_size(quote_ident(schemaname) || '.' || quote_ident(tablename)))::bigint as size FROM pg_tables
    GROUP BY schemaname
    ),
    
    db AS (SELECT pg_database_size(current_database()) AS size)
    
    SELECT schemas.name, 
           pg_size_pretty(schemas.size) as absolute_size,
           schemas.size::float / (SELECT size FROM db)  * 100 as relative_size
    FROM schemas;
    
                                                     QUERY PLAN
    ------------------------------------------------------------------------------------------------------------
     CTE Scan on schemas  (cost=47100.79..47634.34 rows=16417 width=104)
       CTE schemas
         ->  Finalize HashAggregate  (cost=46854.50..47100.76 rows=16417 width=72)
               Group Key: n.nspname
               ->  Gather  (cost=43119.63..46608.25 rows=32834 width=96)
                     Workers Planned: 2
                     ->  Partial HashAggregate  (cost=42119.63..42324.85 rows=16417 width=96)
                           Group Key: n.nspname
                           ->  Hash Left Join  (cost=744.38..39763.93 rows=94228 width=128)
                                 Hash Cond: (c.relnamespace = n.oid)
                                 ->  Parallel Seq Scan on pg_class c  (cost=0.00..38772.14 rows=94228 width=72)
                                       Filter: (relkind = ANY ('{r,p}'::"char"[]))
                                 ->  Hash  (cost=539.17..539.17 rows=16417 width=68)
                                       ->  Seq Scan on pg_namespace n  (cost=0.00..539.17 rows=16417 width=68)
       CTE db
         ->  Result  (cost=0.00..0.01 rows=1 width=8)
       InitPlan 3 (returns $3)
         ->  CTE Scan on db  (cost=0.00..0.02 rows=1 width=8)
    

    vs

    EXPLAIN SELECT schema_name, 
           pg_size_pretty(sum(table_size)::bigint),
           (sum(table_size) / pg_database_size(current_database())) * 100
    FROM (
      SELECT pg_catalog.pg_namespace.nspname as schema_name,
             pg_relation_size(pg_catalog.pg_class.oid) as table_size
      FROM   pg_catalog.pg_class
         JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid
    ) t
    GROUP BY schema_name
    ORDER BY schema_name; 
    
                                           QUERY PLAN
    -------------------------------------------------------------------------------------------
     GroupAggregate  (cost=283636.24..334759.75 rows=1202906 width=128)
       Group Key: pg_namespace.nspname
       ->  Sort  (cost=283636.24..286643.51 rows=1202906 width=72)
             Sort Key: pg_namespace.nspname
             ->  Hash Join  (cost=744.38..51446.15 rows=1202906 width=72)
                   Hash Cond: (pg_class.relnamespace = pg_namespace.oid)
                   ->  Seq Scan on pg_class  (cost=0.00..44536.06 rows=1202906 width=8)
                   ->  Hash  (cost=539.17..539.17 rows=16417 width=68)
                         ->  Seq Scan on pg_namespace  (cost=0.00..539.17 rows=16417 width=68)
    
    0 讨论(0)
  • 2020-12-23 14:55

    Try this:

    SELECT schema_name, 
           sum(table_size),
           (sum(table_size) / database_size) * 100
    FROM (
      SELECT pg_catalog.pg_namespace.nspname as schema_name,
             pg_relation_size(pg_catalog.pg_class.oid) as table_size,
             sum(pg_relation_size(pg_catalog.pg_class.oid)) over () as database_size
      FROM   pg_catalog.pg_class
         JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid
    ) t
    GROUP BY schema_name, database_size


    Edit: just noticed the workaround with summing up all tables to get the database size is not necessary:

    SELECT schema_name, 
           pg_size_pretty(sum(table_size)::bigint),
           (sum(table_size) / pg_database_size(current_database())) * 100
    FROM (
      SELECT pg_catalog.pg_namespace.nspname as schema_name,
             pg_relation_size(pg_catalog.pg_class.oid) as table_size
      FROM   pg_catalog.pg_class
         JOIN pg_catalog.pg_namespace ON relnamespace = pg_catalog.pg_namespace.oid
    ) t
    GROUP BY schema_name
    ORDER BY schema_name
    
    0 讨论(0)
提交回复
热议问题