How to use stream.pipeline in aws nodejs lambda

霸气de小男生 提交于 2020-05-17 06:24:07

问题


I am trying to stream the data from a mongodb cursor into an s3 file using a nodejs lambda.

Following is a snippet of my code.

What i observe is that the lambda does not wait for the pipeline to complete and exists it, so the file is not written to s3.

But the same works fine if I run it as a standalone node.js script.

const logger = require('./logger').logger;
let s3Client = require('aws-sdk/clients/s3');
const stream = require('stream');
const util = require('util');
const pipeline = util.promisify(stream.pipeline);

exports.handler =  async (event, context) => {


    await pipeline(
        client.db("somedb").collection("somecollection").aggregate(crtiriaObj).stream({transform: x => `${JSON.stringify(x)}\n`}),
        uploadFromStream()
    )

};

let uploadFromStream =  () => {

    let pass = new stream.PassThrough();
    let s3 = new s3Client();;


    let params = {Bucket: "bucketname", Key: "filename", Body: pass};

    s3.upload(params, function(err, data) {
        if (err) {
            logger.error(`Error uploading file ${fileName}`,err);
        } else {
            logger.info(`Successfully uploaded file: ${fileName}, result: ${JSON.stringify(data)}`);
        }

    });

    return pass;
};

回答1:


I ended up doing it without async / await fashion.

My coded ended up looking like the snippet below. I have also written a blogpost about it at: https://dev.to/anandsunderraman/copying-over-data-from-mongodb-to-s3-3j4g

const MongoClient = require('mongodb').MongoClient;
let s3Client = require('aws-sdk/clients/s3');
const stream = require('stream');
const pipeline = stream.pipeline;


//brute force method loading all the data into an array
exports.copyData = (event, context, callback) => {

    MongoClient.connect(getDBURI(), {
            useNewUrlParser: true,
            useUnifiedTopology: true
    }).then((dbConnection) => {

        pipeline(
            dbConnection.db("<db-name>").collection("<collection-name>").aggregate(<aggregate-criteria>)
                                        .stream({transform: x => convertToNDJSON(x)}),
            uploadDataToS3(callback),
            (err) => {
                if (err) {
                    console.log('Pipeline failed.', err);
                } else {
                    console.log('Pipeline succeeded.');
                }
            }
        )

    })


}
/**
 * Construct the DB URI based on the environment
 * @returns {string}
 */
const getDBURI = () => {
    //best practice is to fetch the password from AWS Parameter store
    return "mongodb://<username>:<password>@<hostname>/<your-db-name>";
};

//converts each db record to ndjson => newline delimited json
let convertToNDJSON = (data) => {
    return JSON.stringify(data) + "\n";
};

let uploadDataToS3 =  (callback) => {
    let env = process.env;
    let s3 = null;
    let pass = new stream.PassThrough();
    if (env === 'local') {
        s3  = new s3Client({
            accessKeyId: 'minioadmin' ,
            secretAccessKey: 'minioadmin' ,
            endpoint: 'http://host.docker.internal:9000' ,
            s3ForcePathStyle: true, // needed with minio?
            signatureVersion: 'v4'
        });
    } else {
        s3 = new s3Client();
    }
    //using multipart upload to speed up the process
    let params = {Bucket: '<your-bucket-name>', Key: '<file-name>', Body: data};
    let opts = {queueSize: 2, partSize: 1024 * 1024 * 10};

    s3.upload(params,opts, function(err, data) {
        if (err) {
            console.log(`Error uploading file ${file-name}`,err);
        } else {
            console.log(`Successfully uploaded file: ${file-name}, result: ${JSON.stringify(data)}`);
        }
        callback();

    });
    return pass;

};


来源:https://stackoverflow.com/questions/61381335/how-to-use-stream-pipeline-in-aws-nodejs-lambda

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!