Node reading file in specified chunk size

后端 未结 3 1720
野性不改
野性不改 2020-12-30 10:22

The goal: Upload large files to AWS Glacier without holding the whole file in memory.

I\'m currently uploading to glacier now using fs.readFileSync() and things are

相关标签:
3条回答
  • 2020-12-30 10:36

    If nothing else you can just use fs.open(), fs.read(), and fs.close() manually. Example:

    var CHUNK_SIZE = 10 * 1024 * 1024, // 10MB
        buffer = Buffer.alloc(CHUNK_SIZE),
        filePath = '/tmp/foo';
    
    fs.open(filePath, 'r', function(err, fd) {
      if (err) throw err;
      function readNextChunk() {
        fs.read(fd, buffer, 0, CHUNK_SIZE, null, function(err, nread) {
          if (err) throw err;
    
          if (nread === 0) {
            // done reading file, do any necessary finalization steps
    
            fs.close(fd, function(err) {
              if (err) throw err;
            });
            return;
          }
    
          var data;
          if (nread < CHUNK_SIZE)
            data = buffer.slice(0, nread);
          else
            data = buffer;
    
          // do something with `data`, then call `readNextChunk();`
        });
      }
      readNextChunk();
    });
    
    0 讨论(0)
  • 2020-12-30 10:38

    You may consider using below snippet where we read file in chunk of 1024 bytes

    var fs = require('fs');
    
    var data = '';
    
    var readStream = fs.createReadStream('/tmp/foo.txt',{ highWaterMark: 1 * 1024, encoding: 'utf8' });
    
    readStream.on('data', function(chunk) {
        data += chunk;
        console.log('chunk Data : ')
        console.log(chunk);// your processing chunk logic will go here
    
    }).on('end', function() {
        console.log('###################');
        console.log(data); 
    // here you see all data processed at end of file
        });
    

    Please Note : highWaterMark is the parameter used for chunk size Hope this Helps!

    Web Reference: https://stackabuse.com/read-files-with-node-js/ Changing readstream chunksize

    0 讨论(0)
  • 2020-12-30 10:56

    Based on mscdex's answer here's a module using the sync alternative and with a StringDecoder to correctly parse UTF-8

    The problem with readableStream is that in order to use it, you've to convert the entire project to use async emitters & callbacks. If you're coding something simple, like a small CLI in nodejs, it doesn't make sense.

    //usage
    let file = new UTF8FileReader()
    file.open('./myfile.txt', 1024) 
    while ( file.isOpen ) {
        let stringData=file.readChunk()
        console.log(stringData)
    }
    
    
    //--------------------
    // UTF8FileReader.ts
    //--------------------
    import * as fs from 'fs';
    import { StringDecoder, NodeStringDecoder } from "string_decoder";
    
    export class UTF8FileReader {
    
        filename: string;
        isOpen: boolean = false;
        private chunkSize: number;
        private fd: number; //file handle from fs.OpenFileSync
        private readFilePos: number;
        private readBuffer: Buffer;
    
        private utf8decoder: NodeStringDecoder
    
        /**
         * open the file | throw
         * @param filename
         */
        open(filename, chunkSize: number = 16 * 1024) {
    
            this.chunkSize = chunkSize;
    
            try {
                this.fd = fs.openSync(filename, 'r');
            }
            catch (e) {
                throw new Error("opening " + filename + ", error:" + e.toString());
            }
    
            this.filename = filename;
            this.isOpen = true;
    
            this.readBuffer = Buffer.alloc(this.chunkSize);
            this.readFilePos = 0;
    
            //a StringDecoder is a buffered object that ensures complete UTF-8 multibyte decoding from a byte buffer
            this.utf8decoder = new StringDecoder('utf8')
    
        }
    
        /**
         * read another chunk from the file 
         * return the decoded UTF8 into a string
         * (or throw)
         * */
        readChunk(): string {
    
            let decodedString = '' //return '' by default
    
            if (!this.isOpen) {
                return decodedString;
            }
    
            let readByteCount: number;
            try {
                readByteCount = fs.readSync(this.fd, this.readBuffer, 0, this.chunkSize, this.readFilePos);
            }
            catch (e) {
                throw new Error("reading " + this.filename + ", error:" + e.toString());
            }
    
            if (readByteCount) {
                //some data read, advance readFilePos 
                this.readFilePos += readByteCount;
                //get only the read bytes (if we reached the end of the file)
                const onlyReadBytesBuf = this.readBuffer.slice(0, readByteCount);
                //correctly decode as utf8, and store in decodedString
                //yes, the api is called "write", but it decodes a string - it's a write-decode-and-return the string kind-of-thing :)
                decodedString = this.utf8decoder.write(onlyReadBytesBuf); 
            }
            else {
                //read returns 0 => all bytes read
                this.close();
            }
            return decodedString 
        }
    
        close() {
            if (!this.isOpen) {
                return;
            }
            fs.closeSync(this.fd);
            this.isOpen = false;
            this.utf8decoder.end();
        }
    
    }
    

    and here is the .js transpiled code if you don't have typescript yet:

    // UTF8FileReader.js
    "use strict";
    Object.defineProperty(exports, "__esModule", { value: true });
    exports.UTF8FileReader = void 0;
    //--------------------
    // UTF8FileReader
    //--------------------
    const fs = require("fs");
    const string_decoder_1 = require("string_decoder");
    class UTF8FileReader {
        constructor() {
            this.isOpen = false;
        }
        /**
         * open the file | throw
         * @param filename
         */
        open(filename, chunkSize = 16 * 1024) {
            this.chunkSize = chunkSize;
            try {
                this.fd = fs.openSync(filename, 'r');
            }
            catch (e) {
                throw new Error("opening " + filename + ", error:" + e.toString());
            }
            this.filename = filename;
            this.isOpen = true;
            this.readBuffer = Buffer.alloc(this.chunkSize);
            this.readFilePos = 0;
            //a StringDecoder is a buffered object that ensures complete UTF-8 multibyte decoding from a byte buffer
            this.utf8decoder = new string_decoder_1.StringDecoder('utf8');
        }
        /**
         * read another chunk from the file
         * return the decoded UTF8 into a string
         * (or throw)
         * */
        readChunk() {
            let decodedString = ''; //return '' by default
            if (!this.isOpen) {
                return decodedString;
            }
            let readByteCount;
            try {
                readByteCount = fs.readSync(this.fd, this.readBuffer, 0, this.chunkSize, this.readFilePos);
            }
            catch (e) {
                throw new Error("reading " + this.filename + ", error:" + e.toString());
            }
            if (readByteCount) {
                //some data read, advance readFilePos 
                this.readFilePos += readByteCount;
                //get only the read bytes (if we reached the end of the file)
                const onlyReadBytesBuf = this.readBuffer.slice(0, readByteCount);
                //correctly decode as utf8, and store in decodedString
                //yes, the api is called "write", but it decodes a string - it's a write-decode-and-return the string kind-of-thing :)
                decodedString = this.utf8decoder.write(onlyReadBytesBuf);
            }
            else {
                //read returns 0 => all bytes read
                this.close();
            }
            return decodedString;
        }
        close() {
            if (!this.isOpen) {
                return;
            }
            fs.closeSync(this.fd);
            this.isOpen = false;
            this.utf8decoder.end();
        }
    }
    exports.UTF8FileReader = UTF8FileReader;
    
    0 讨论(0)
提交回复
热议问题