NodeJS: Merge two PDF files into one using the buffer obtained by reading them

前端 未结 3 565
轮回少年
轮回少年 2020-12-28 08:18

I am using fill-pdf npm module for filling template pdf\'s and it creates new file which is read from the disk and returned as buffer to callback. I have two files for which

相关标签:
3条回答
  • 2020-12-28 08:56

    HummusJS supports combining PDFs using its appendPDFPagesFromPDF method

    Example using streams to work with buffers:

    const hummus = require('hummus');
    const memoryStreams = require('memory-streams');
    
    /**
     * Concatenate two PDFs in Buffers
     * @param {Buffer} firstBuffer 
     * @param {Buffer} secondBuffer 
     * @returns {Buffer} - a Buffer containing the concactenated PDFs
     */
    const combinePDFBuffers = (firstBuffer, secondBuffer) => {
        var outStream = new memoryStreams.WritableStream();
    
        try {
            var firstPDFStream = new hummus.PDFRStreamForBuffer(firstBuffer);
            var secondPDFStream = new hummus.PDFRStreamForBuffer(secondBuffer);
    
            var pdfWriter = hummus.createWriterToModify(firstPDFStream, new hummus.PDFStreamForResponse(outStream));
            pdfWriter.appendPDFPagesFromPDF(secondPDFStream);
            pdfWriter.end();
            var newBuffer = outStream.toBuffer();
            outStream.end();
    
            return newBuffer;
        }
        catch(e){
            outStream.end();
            throw new Error('Error during PDF combination: ' + e.message);
        }
    };
    
    combinePDFBuffers(PDFBuffer1, PDFBuffer2);
    
    0 讨论(0)
  • 2020-12-28 08:58

    Here's what we use in our Express server to merge a list of PDF blobs.

    const { PDFRStreamForBuffer, createWriterToModify, PDFStreamForResponse } = require('hummus');
    const { WritableStream } = require('memory-streams');
    
    // Merge the pages of the pdfBlobs (Javascript buffers) into a single PDF blob                                                                                                                                                                  
    const mergePdfs = pdfBlobs => {
      if (pdfBlobs.length === 0) throw new Error('mergePdfs called with empty list of PDF blobs');
      // This optimization is not necessary, but it avoids the churn down below                                                                                                                                                
      if (pdfBlobs.length === 1) return pdfBlobs[0];
    
      // Adapted from: https://stackoverflow.com/questions/36766234/nodejs-merge-two-pdf-files-into-one-using-the-buffer-obtained-by-reading-them?answertab=active#tab-top                                                     
      // Hummus is useful, but with poor interfaces -- E.g. createWriterToModify shouldn't require any PDF stream                                                                                                              
      // And Hummus has many Issues: https://github.com/galkahana/HummusJS/issues                                                                                                                                              
      const [firstPdfRStream, ...restPdfRStreams] = pdfBlobs.map(pdfBlob => new PDFRStreamForBuffer(pdfBlob));
      const outStream = new WritableStream();
      const pdfWriter = createWriterToModify(firstPdfRStream, new PDFStreamForResponse(outStream));
      restPdfRStreams.forEach(pdfRStream => pdfWriter.appendPDFPagesFromPDF(pdfRStream));
      pdfWriter.end();
      outStream.end();
      return outStream.toBuffer();
    };
    
    module.exports = exports = {
      mergePdfs,
    };
    
    0 讨论(0)
  • 2020-12-28 09:13

    As mentioned by @MechaCode, the creator has ended support for HummusJS.

    So I would like to give you 2 solutions.

    1. Using node-pdftk npm module

      The Following sample code uses node-pdftk npm module to combine two pdf buffers seamlessly.

      const pdftk = require('node-pdftk');
      
      var pdfBuffer1 = fs.readFileSync("./pdf1.pdf");
      var pdfBuffer2 = fs.readFileSync("./pdf2.pdf");
      
      pdftk
          .input([pdfBuffer1, pdfBuffer2])
          .output()
          .then(buf => {
              let path = 'merged.pdf';
              fs.open(path, 'w', function (err, fd) {
                  fs.write(fd, buf, 0, buf.length, null, function (err) {
                      fs.close(fd, function () {
                          console.log('wrote the file successfully');
                      });
                  });
              });
          });
      

      The requirement for node-pdftk npm module is you need to install the PDFtk library. Some of you may find this overhead / tedious. So I have another solution using pdf-lib library.

    2. Using pdf-lib npm module

      const PDFDocument = require('pdf-lib').PDFDocument
      
      var pdfBuffer1 = fs.readFileSync("./pdf1.pdf"); 
      var pdfBuffer2 = fs.readFileSync("./pdf2.pdf");
      
      var pdfsToMerge = [pdfBuffer1, pdfBuffer2]
      
      const mergedPdf = await PDFDocument.create(); 
      for (const pdfBytes of pdfsToMerge) { 
          const pdf = await PDFDocument.load(pdfBytes); 
          const copiedPages = await mergedPdf.copyPages(pdf, pdf.getPageIndices());
          copiedPages.forEach((page) => {
               mergedPdf.addPage(page); 
          }); 
      } 
      
      const buf = await mergedPdf.save();        // Uint8Array
      
      let path = 'merged.pdf'; 
      fs.open(path, 'w', function (err, fd) {
          fs.write(fd, buf, 0, buf.length, null, function (err) {
              fs.close(fd, function () {
                  console.log('wrote the file successfully');
              }); 
          }); 
      }); 
      
      

    Personally I prefer to use pdf-lib npm module.

    0 讨论(0)
提交回复
热议问题