How to split a PDF into multiple documents

…衆ロ難τιáo~ 提交于 2019-12-29 08:25:13

问题


I have a large PDF that has been combined from multiple documents.

How can I split the PDF back into multiple documents with a keyword delimiter?


回答1:


As well as Adobe Reader you will need Adobe Acrobat.

Add the following script using the Action Wizard:

Paste in the following script and modify for your needs. See //comments for help on customisation.

/* Extract Pages into Documents by Keyword */
// Iterates over all pages and find a given string and extracts all 
// pages on which that string is found to a new file.

var pageArray = [];
var pageArrayEnd = [];

var stringToSearchFor = app.response("This Action Script splits the document by a keyword on each X number of pages, please enter the keyword:");

for (var p = 0; p < this.numPages; p++) {
    // iterate over all words
    for (var n = 0; n < this.getPageNumWords(p); n++) {
    // DEBUGGING HELP, UNCOMMENT NEXT LINE, CHANGE TO MATCH MULTIPLE WORDS OR WHAT EVER ORDER, eg if ((this.getPageNthWord(p, n) == stringToSearchFor) && (this.getPageNthWord(p, n + 1) == stringToSearchForTWO)) {..., Also add a prompt for the second search word and iterate one less for (var n = 0; n < this.getPageNumWords(p) - 1; n++) ...
    //app.alert("Word is " + this.getPageNthWord(p, n));
        if (this.getPageNthWord(p, n) == stringToSearchFor) {
            //app.alert("Found word on page " + p + " word number " + n, 3);
            if (pageArray.length > 0) {
                pageArrayEnd.push(p - 1);
            }
            pageArray.push(p);
            break;
        }
    }
}

pageArrayEnd.push(this.numPages - 1);
//app.alert("Number of sub documents " + pageArray.length, 3);
if (pageArray.length > 0) {
    // extract all pages that contain the string into a new document
    for (var n = 0; n < pageArray.length; n++) {
        var d = app.newDoc();    // this will add a blank page - we need to remove that once we are done
            //app.alert("New Doc using pages " + pageArray[n] + " to " + pageArrayEnd[n], 3);
            d.insertPages( {
                            nPage: d.numPages-1,
                            cPath: this.path,
                            nStart: pageArray[n],
                            nEnd: pageArrayEnd[n],
            } );
        // remove the first page
        d.deletePages(0);
        d.saveAs({ cPath: this.path.replace(".pdf","") + n + ".pdf" });
        d.closeDoc(true);
    }
}



回答2:


Please have a look at this guide on how to split PDF into multiple file:

// Used to register all DLL assemblies.
WorkRegistry.Reset();

String inputFilePath = Program.RootPath + "\\" + "1.pdf";
String outputFileName = "Output";
int[] splitIndex = new int[3] { 1, 3, 5 }; // Valid value for each index: 1 to (Page Count - 1).

// Create output PDF file path list
List<String> outputFilePaths = new List<String>();
for (int i = 0; i <= splitIndex.Length; i++)
{
        outputFilePaths.Add(Program.RootPath + "\\" + outputFileName + "_" + i.ToString() + ".pdf");
}

// Split input PDF file to 4 files:
// File 0: page 0.
// File 1: page 1 ~ 2.
// File 2: page 3 ~ 4.
// File 3: page 5 ~ the last page.
PDFDocument.SplitDocument(inputFilePath, splitIndex, outputFilePaths.ToArray());


来源:https://stackoverflow.com/questions/27831572/how-to-split-a-pdf-into-multiple-documents

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!