I can\'t find any packages to do this. I know PHP has a ton of libraries for PDFs (like http://www.fpdf.org/) but anything for Node?
Here is an example showing how to download and extract text from a PDF using PDF.js:
import _ from 'lodash';
import superagent from 'superagent';
import pdf from 'pdfjs-dist';
const url = 'http://unec.edu.az/application/uploads/2014/12/pdf-sample.pdf';
const main = async () => {
const response = await superagent.get(url).buffer();
const data = response.body;
const doc = await pdf.getDocument({ data });
for (const i of _.range(doc.numPages)) {
const page = await doc.getPage(i + 1);
const content = await page.getTextContent();
for (const { str } of content.items) {
console.log(str);
}
}
};
main().catch(error => console.error(error));