The goal: Upload large files to AWS Glacier without holding the whole file in memory.
I\'m currently uploading to glacier now using fs.readFileSync() and things are
If nothing else you can just use fs.open(), fs.read(), and fs.close() manually. Example:
var CHUNK_SIZE = 10 * 1024 * 1024, // 10MB
buffer = Buffer.alloc(CHUNK_SIZE),
filePath = '/tmp/foo';
fs.open(filePath, 'r', function(err, fd) {
if (err) throw err;
function readNextChunk() {
fs.read(fd, buffer, 0, CHUNK_SIZE, null, function(err, nread) {
if (err) throw err;
if (nread === 0) {
// done reading file, do any necessary finalization steps
fs.close(fd, function(err) {
if (err) throw err;
});
return;
}
var data;
if (nread < CHUNK_SIZE)
data = buffer.slice(0, nread);
else
data = buffer;
// do something with `data`, then call `readNextChunk();`
});
}
readNextChunk();
});
You may consider using below snippet where we read file in chunk of 1024 bytes
var fs = require('fs');
var data = '';
var readStream = fs.createReadStream('/tmp/foo.txt',{ highWaterMark: 1 * 1024, encoding: 'utf8' });
readStream.on('data', function(chunk) {
data += chunk;
console.log('chunk Data : ')
console.log(chunk);// your processing chunk logic will go here
}).on('end', function() {
console.log('###################');
console.log(data);
// here you see all data processed at end of file
});
Please Note : highWaterMark is the parameter used for chunk size Hope this Helps!
Web Reference: https://stackabuse.com/read-files-with-node-js/ Changing readstream chunksize
Based on mscdex's answer here's a module using the sync alternative and with a StringDecoder to correctly parse UTF-8
The problem with readableStream
is that in order to use it, you've to convert the entire project to use async emitters & callbacks. If you're coding something simple, like a small CLI in nodejs, it doesn't make sense.
//usage
let file = new UTF8FileReader()
file.open('./myfile.txt', 1024)
while ( file.isOpen ) {
let stringData=file.readChunk()
console.log(stringData)
}
//--------------------
// UTF8FileReader.ts
//--------------------
import * as fs from 'fs';
import { StringDecoder, NodeStringDecoder } from "string_decoder";
export class UTF8FileReader {
filename: string;
isOpen: boolean = false;
private chunkSize: number;
private fd: number; //file handle from fs.OpenFileSync
private readFilePos: number;
private readBuffer: Buffer;
private utf8decoder: NodeStringDecoder
/**
* open the file | throw
* @param filename
*/
open(filename, chunkSize: number = 16 * 1024) {
this.chunkSize = chunkSize;
try {
this.fd = fs.openSync(filename, 'r');
}
catch (e) {
throw new Error("opening " + filename + ", error:" + e.toString());
}
this.filename = filename;
this.isOpen = true;
this.readBuffer = Buffer.alloc(this.chunkSize);
this.readFilePos = 0;
//a StringDecoder is a buffered object that ensures complete UTF-8 multibyte decoding from a byte buffer
this.utf8decoder = new StringDecoder('utf8')
}
/**
* read another chunk from the file
* return the decoded UTF8 into a string
* (or throw)
* */
readChunk(): string {
let decodedString = '' //return '' by default
if (!this.isOpen) {
return decodedString;
}
let readByteCount: number;
try {
readByteCount = fs.readSync(this.fd, this.readBuffer, 0, this.chunkSize, this.readFilePos);
}
catch (e) {
throw new Error("reading " + this.filename + ", error:" + e.toString());
}
if (readByteCount) {
//some data read, advance readFilePos
this.readFilePos += readByteCount;
//get only the read bytes (if we reached the end of the file)
const onlyReadBytesBuf = this.readBuffer.slice(0, readByteCount);
//correctly decode as utf8, and store in decodedString
//yes, the api is called "write", but it decodes a string - it's a write-decode-and-return the string kind-of-thing :)
decodedString = this.utf8decoder.write(onlyReadBytesBuf);
}
else {
//read returns 0 => all bytes read
this.close();
}
return decodedString
}
close() {
if (!this.isOpen) {
return;
}
fs.closeSync(this.fd);
this.isOpen = false;
this.utf8decoder.end();
}
}
and here is the .js transpiled code if you don't have typescript yet:
// UTF8FileReader.js
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.UTF8FileReader = void 0;
//--------------------
// UTF8FileReader
//--------------------
const fs = require("fs");
const string_decoder_1 = require("string_decoder");
class UTF8FileReader {
constructor() {
this.isOpen = false;
}
/**
* open the file | throw
* @param filename
*/
open(filename, chunkSize = 16 * 1024) {
this.chunkSize = chunkSize;
try {
this.fd = fs.openSync(filename, 'r');
}
catch (e) {
throw new Error("opening " + filename + ", error:" + e.toString());
}
this.filename = filename;
this.isOpen = true;
this.readBuffer = Buffer.alloc(this.chunkSize);
this.readFilePos = 0;
//a StringDecoder is a buffered object that ensures complete UTF-8 multibyte decoding from a byte buffer
this.utf8decoder = new string_decoder_1.StringDecoder('utf8');
}
/**
* read another chunk from the file
* return the decoded UTF8 into a string
* (or throw)
* */
readChunk() {
let decodedString = ''; //return '' by default
if (!this.isOpen) {
return decodedString;
}
let readByteCount;
try {
readByteCount = fs.readSync(this.fd, this.readBuffer, 0, this.chunkSize, this.readFilePos);
}
catch (e) {
throw new Error("reading " + this.filename + ", error:" + e.toString());
}
if (readByteCount) {
//some data read, advance readFilePos
this.readFilePos += readByteCount;
//get only the read bytes (if we reached the end of the file)
const onlyReadBytesBuf = this.readBuffer.slice(0, readByteCount);
//correctly decode as utf8, and store in decodedString
//yes, the api is called "write", but it decodes a string - it's a write-decode-and-return the string kind-of-thing :)
decodedString = this.utf8decoder.write(onlyReadBytesBuf);
}
else {
//read returns 0 => all bytes read
this.close();
}
return decodedString;
}
close() {
if (!this.isOpen) {
return;
}
fs.closeSync(this.fd);
this.isOpen = false;
this.utf8decoder.end();
}
}
exports.UTF8FileReader = UTF8FileReader;