Node calling postgres function with temp tables causing “memory leak”

前端 未结 2 1400
别跟我提以往
别跟我提以往 2021-01-14 17:57

I have a node.js program calling a Postgres (Amazon RDS micro instance) function, get_jobs within a transaction, 18 times a second using the node-postgres

相关标签:
2条回答
  • 2021-01-14 18:51

    I used this to great effect with SQL Server and I don't trust any query optimiser now

    Then don't use them. You can still execute queries directly, as shown below.

    but please tell me if this is the wrong approach for Postgres!

    It is not a completely wrong approach, it's just a very awkward one, as you are trying to create something that's been implemented by others for a much easier use. As a result, you are making many mistakes that can lead to many problems, including memory leaks.

    Compare to the simplicity of the exact same example that uses pg-promise:

    var pgp = require('pg-promise')();
    var conString = "postgres://username:password@server/database";
    var db = pgp(conString);
    
    function getJobs() {
        return db.tx(function (t) {
            return t.func('get_jobs');
        });
    }
    
    function poll() {
        getJobs()
            .then(function (jobs) {
                // process the jobs
            })
            .catch(function (error) {
                // error
            });
    
        setTimeout(poll, 55);
    }
    
    poll(); // start polling
    

    Gets even simpler when using ES6 syntax:

    var pgp = require('pg-promise')();
    var conString = "postgres://username:password@server/database";
    var db = pgp(conString);
    
    function poll() {
        db.tx(t=>t.func('get_jobs'))
            .then(jobs=> {
                // process the jobs
            })
            .catch(error=> {
                // error
            });
    
        setTimeout(poll, 55);
    }
    
    poll(); // start polling
    

    The only thing that I didn't quite understand in your example - the use of a transaction to execute a single SELECT. This is not what transactions are generally for, as you are not changing any data. I assume you were trying to shrink a real piece of code you had that changes some data also.

    In case you don't need a transaction, your code can be further reduced to:

    var pgp = require('pg-promise')();
    var conString = "postgres://username:password@server/database";
    var db = pgp(conString);
    
    function poll() {
        db.func('get_jobs')
            .then(jobs=> {
                // process the jobs
            })
            .catch(error=> {
                // error
            });
    
        setTimeout(poll, 55);
    }
    
    poll(); // start polling
    

    UPDATE

    It would be a dangerous approach, however, not to control the end of the previous request, which also may create memory/connection issues.

    A safe approach should be:

    function poll() {
        db.tx(t=>t.func('get_jobs'))
            .then(jobs=> {
                // process the jobs
    
                setTimeout(poll, 55);
            })
            .catch(error=> {
                // error
    
                setTimeout(poll, 55);
            });
    }
    
    0 讨论(0)
  • 2021-01-14 18:53

    Use CTEs to create partial result sets instead of temp tables.

    CREATE OR REPLACE FUNCTION get_jobs (
    ) RETURNS TABLE (
      ...
    ) AS 
    $BODY$
    DECLARE 
      _nowstamp bigint; 
    BEGIN
    
      -- take the current unix server time in ms
      _nowstamp := (select extract(epoch from now()) * 1000)::bigint;  
    
      RETURN query (
    
        --  1. get the jobs that are due
        WITH jobs AS (
    
          select ...
          from really_big_table_1 
          where job_time < _nowstamp;
    
        --  2. get other stuff attached to those jobs
        ), jobs_extra AS (
    
          select ...
          from really_big_table_2 r
            inner join jobs j on r.id = j.some_id
    
        ) 
    
        -- 3. return the final result with a join to a third big table
        select je.id, ...
        from jobs_extra je
          left join really_big_table_3 r on je.id = r.id
        group by je.id
    
      );
    
    END
    $BODY$ LANGUAGE plpgsql VOLATILE;
    

    The planner will evaluate each block in sequence the way I wanted to achieve with temp tables.

    I know this doesn't directly solve the memory leak issue (I'm pretty sure there's something wrong with Postgres' implementation of them, at least the way they manifest on the RDS configuration).

    However, the query works, it is query planned the way I was intending and the memory usage is stable now after 3 days of running the job and my server doesn't crash.

    I didn't change the node code at all.

    0 讨论(0)
提交回复
热议问题