问题
I am trying to read an entire .pdf Document using PDF.js and then render all the pages on a single canvas.
My idea: render each page onto a canvas and get the ImageData (context.getImageData()), clear the canvas do the next page. I store all the ImageDatas in an array and once all pages are in there I want to put all the ImageDatas from the array onto a single canvas.
var pdf = null;
PDFJS.disableWorker = true;
var pages = new Array();
//Prepare some things
var canvas = document.getElementById('cv');
var context = canvas.getContext('2d');
var scale = 1.5;
PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
pdf = _pdf;
//Render all the pages on a single canvas
for(var i = 1; i <= pdf.numPages; i ++){
pdf.getPage(i).then(function getPage(page){
var viewport = page.getViewport(scale);
canvas.width = viewport.width;
canvas.height = viewport.height;
page.render({canvasContext: context, viewport: viewport});
pages[i-1] = context.getImageData(0, 0, canvas.width, canvas.height);
context.clearRect(0, 0, canvas.width, canvas.height);
p.Out("pre-rendered page " + i);
});
}
//Now we have all 'dem Pages in "pages" and need to render 'em out
canvas.height = 0;
var start = 0;
for(var i = 0; i < pages.length; i++){
if(canvas.width < pages[i].width) canvas.width = pages[i].width;
canvas.height = canvas.height + pages[i].height;
context.putImageData(pages[i], 0, start);
start += pages[i].height;
}
});
So from the way I understnad thing this should work, right? When I run this I end up with the canvas that is big enought to contain all the pages of the pdf but doesn't show the pdf...
Thank you for helping.
回答1:
I can’t speak to the part of your code that renders the pdf into a canvas, but I do see some problems.
- Every resetting canvas.width or canvas.height automatically clears the canvas contents. So in the top section, your clearRect is not needed because the canvas is cleared by canvas.width prior to your every page.render.
- More importantly, in the bottom section, all your previous pdf drawings are cleared by every canvas resizing (oops!).
- getImageData() gets an array where each pixel is represented by 4 consecutive elements of that array (red then green then blue then alpha). Since getImageData() is an array, so it doesn’t have a pages[i].width or pages[i].height—it only has a pages[i].length. That array length cannot be used to determine widths or heights.
So to get you started, I would start by changing your code to this (very, very untested!):
var pdf = null;
PDFJS.disableWorker = true;
var pages = new Array();
//Prepare some things
var canvas = document.getElementById('cv');
var context = canvas.getContext('2d');
var scale = 1.5;
var canvasWidth=0;
var canvasHeight=0;
var pageStarts=new Array();
pageStarts[0]=0;
PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
pdf = _pdf;
//Render all the pages on a single canvas
for(var i = 1; i <= pdf.numPages; i ++){
pdf.getPage(i).then(function getPage(page){
var viewport = page.getViewport(scale);
// changing canvas.width and/or canvas.height auto-clears the canvas
canvas.width = viewport.width;
canvas.height = viewport.height;
page.render({canvasContext: context, viewport: viewport});
pages[i-1] = context.getImageData(0, 0, canvas.width, canvas.height);
// calculate the width of the final display canvas
if(canvas.width>maxCanvasWidth){
maxCanvasWidth=canvas.width;
}
// calculate the accumulated with of the final display canvas
canvasHeight+=canvas.height;
// save the "Y" starting position of this pages[i]
pageStarts[i]=pageStarts[i-1]+canvas.height;
p.Out("pre-rendered page " + i);
});
}
canvas.width=canvasWidth;
canvas.height = canvasHeight; // this auto-clears all canvas contents
for(var i = 0; i < pages.length; i++){
context.putImageData(pages[i], 0, pageStarts[i]);
}
});
Alternatively, here’s a more traditional way of accomplishing your task:
Use a single “display” canvas and allow the user to “page through” each desired page.
Since you already start by drawing each page into a canvas, why not keep a separate, hidden canvas for each page. Then when the user wants to see page#6, you just copy the hidden canvas#6 onto your display canvas.
The Mozilla devs use this approach in their pdfJS demo here: http://mozilla.github.com/pdf.js/web/viewer.html
You can check out the code for the viewer here: http://mozilla.github.com/pdf.js/web/viewer.js
回答2:
The PDF operations are asynchronous at all stages. This means you also need to catch the promise at the last render as well. If you not catch it you will only get a blank canvas as the rendering isn't finished before the loop continues to the next page.
Tip: I would also recommend that you use something else than getImageData
as this will store uncompressed bitmap, for example the data-uri instead which is compressed data.
Here is a slightly different approach eliminating the for-loop and uses the promises better for this purpose:
LIVE FIDDLE
var canvas = document.createElement('canvas'), // single off-screen canvas
ctx = canvas.getContext('2d'), // to render to
pages = [],
currentPage = 1,
url = 'path/to/document.pdf'; // specify a valid url
PDFJS.getDocument(url).then(iterate); // load PDF document
/* To avoid too many levels, which easily happen when using chained promises,
the function is separated and just referenced in the first promise callback
*/
function iterate(pdf) {
// init parsing of first page
if (currentPage <= pdf.numPages) getPage();
// main entry point/function for loop
function getPage() {
// when promise is returned do as usual
pdf.getPage(currentPage).then(function(page) {
var scale = 1.5;
var viewport = page.getViewport(scale);
canvas.height = viewport.height;
canvas.width = viewport.width;
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
// now, tap into the returned promise from render:
page.render(renderContext).then(function() {
// store compressed image data in array
pages.push(canvas.toDataURL());
if (currentPage < pdf.numPages) {
currentPage++;
getPage(); // get next page
}
else {
done(); // call done() when all pages are parsed
}
});
});
}
}
When you then need to retrieve a page you simply create an image element and set the data-uri as source:
function drawPage(index, callback) {
var img = new Image;
img.onload = function() {
/* this will draw the image loaded onto canvas at position 0,0
at the optional width and height of the canvas.
'this' is current image loaded
*/
ctx.drawImage(this, 0, 0, ctx.canvas.width, ctx.canvas.height);
callback(); // invoke callback when we're done
}
img.src = pages[index]; // start loading the data-uri as source
}
Due to the image loading it will be asynchronous in nature as well which is why we need the callback. If you don't want the asynchronous nature then you could also do this step (creating and setting the image element) in the render promise above storing image elements instead of data-uris.
Hope this helps!
回答3:
You can pass the number page to the promises , get that page canvas data and render in the right order on canvas
var renderPageFactory = function (pdfDoc, num) {
return function () {
var localCanvas = document.createElement('canvas');
///return pdfDoc.getPage(num).then(renderPage);
return pdfDoc.getPage(num).then((page) => {
renderPage(page, localCanvas, num);
});
};
};
var renderPages = function (pdfDoc) {
var renderedPage = $q.resolve();
for (var num = 1; num <= pdfDoc.numPages; num++) {
// Wait for the last page t render, then render the next
renderedPage = renderedPage.then(renderPageFactory(pdfDoc, num));
}
};
renderPages(pdf);
Complete example
function renderPDF(url, canvas) {
var pdf = null;
PDFJS.disableWorker = true;
var pages = new Array();
var context = canvas.getContext('2d');
var scale = 1;
var canvasWidth = 256;
var canvasHeight = 0;
var pageStarts = new Array();
pageStarts[0] = 0;
var k = 0;
function finishPage(localCanvas, num) {
var ctx = localCanvas.getContext('2d');
pages[num] = ctx.getImageData(0, 0, localCanvas.width, localCanvas.height);
// calculate the accumulated with of the final display canvas
canvasHeight += localCanvas.height;
// save the "Y" starting position of this pages[i]
pageStarts[num] = pageStarts[num -1] + localCanvas.height;
if (k + 1 >= pdf.numPages)
{
canvas.width = canvasWidth;
canvas.height = canvasHeight; // this auto-clears all canvas contents
for (var i = 0; i < pages.length; i++) {
context.putImageData(pages[i+1], 0, pageStarts[i]);
}
var img = canvas.toDataURL("image/png");
$scope.printPOS(img);
}
k++;
}
function renderPage(page, localCanvas, num) {
var ctx = localCanvas.getContext('2d');
var viewport = page.getViewport(scale);
// var viewport = page.getViewport(canvas.width / page.getViewport(1.0).width);
// changing canvas.width and/or canvas.height auto-clears the canvas
localCanvas.width = viewport.width;
/// viewport.width = canvas.width;
localCanvas.height = viewport.height;
var renderTask = page.render({canvasContext: ctx, viewport: viewport});
renderTask.then(() => {
finishPage(localCanvas, num);
});
}
PDFJS.getDocument(url).then(function getPdfHelloWorld(_pdf) {
pdf = _pdf;
var renderPageFactory = function (pdfDoc, num) {
return function () {
var localCanvas = document.createElement('canvas');
///return pdfDoc.getPage(num).then(renderPage);
return pdfDoc.getPage(num).then((page) => {
renderPage(page, localCanvas, num);
});
};
};
var renderPages = function (pdfDoc) {
var renderedPage = $q.resolve();
for (var num = 1; num <= pdfDoc.numPages; num++) {
// Wait for the last page t render, then render the next
renderedPage = renderedPage.then(renderPageFactory(pdfDoc, num));
}
};
renderPages(pdf);
});
}
来源:https://stackoverflow.com/questions/15341010/render-pdf-to-single-canvas-using-pdf-js-and-imagedata