Replace Specific Document in PDF

做~自己de王妃 提交于 2019-12-24 16:10:04

问题


Includes:

using Ghostscript.NET;
using Ghostscript.NET.Processor;
using Ghostscript.NET.Rasterizer;

Right now, I am using Ghostscript.Net to merge several single PDFs into a single document:

/// <summary>
/// Ghostscripts the file specified in parameter 1 as a PDF to the file specified in parameter 2
/// </summary>
/// <param name="fileNames">String[]. Array of Full Paths to a file to convert to a single PDF</param>
/// <param name="outputPath">String. Full Path to where Ghostscript will write the PDF</param>
public static void GhostscriptNetJoin(String[] fileNames, String outputPath)
{
    var sb = new StringBuilder();
    foreach (var fileName in fileNames)
    {
        var source = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("\"{0}\"", fileName);
        sb.Append(source + " ");
    }
    var output_file = (outputPath.IndexOf(' ') == -1) ? outputPath : String.Format("\"{0}\"", outputPath);
    var gsArgs = new List<String>();
    gsArgs.Add("-empty"); // first argument is ignored. REF: http://stackoverflow.com/q/25202577/153923
    gsArgs.Add("-dBATCH");
    gsArgs.Add("-q");
    gsArgs.Add("-dNOPAUSE");
    gsArgs.Add("-dNOPROMPT");
    gsArgs.Add("-sDEVICE=pdfwrite");
    gsArgs.Add("-dPDFSETTINGS=/prepress");
    gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
    gsArgs.Add(sb.ToString());
    var version = GhostscriptVersionInfo.GetLastInstalledVersion();
    using (var processor = new GhostscriptProcessor(version, false))
    {
        processor.Process(gsArgs.ToArray());
    }
}

How could I come back later to REPLACE or UPDATE page N?

I have stubbed out a routine that has my plan, but at this time I do not know how to complete it. Can I supply arg values or is there a different tool I should be using?

/// <summary>
/// Replace Specific Document from source PDF file
/// </summary>
/// <param name="source">String. Full path to the multi-page PDF</param>
/// <param name="documentN">String. Full path to the document to insert</param>
/// <param name="indexN">int. Page Index where the new document should be inserted</param>
public static void GhostscriptNetReplace(String source, String documentN, int indexN)
{
    var list = new List<String>();
    var version = GhostscriptVersionInfo.GetLastInstalledVersion();
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var gsArgs = new List<String>();
        // what arguments are needed?
        throw new NotImplementedException("I don't know how to code for this yet.");
        processor.Process(gsArgs.ToArray());
    }
    list.RemoveAt(indexN);
    list.Insert(indexN, documentN);
    var sb = new StringBuilder();
    foreach (var fileName in list)
    {
        var fmtSource = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("\"{0}\"", fileName);
        sb.Append(fmtSource + " ");
    }
    var output_file = (source.IndexOf(' ') == -1) ? source : String.Format("\"{0}\"", source);
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var gsArgs = new List<String>();
        gsArgs.Add("-empty"); // first argument is ignored. REF: http://stackoverflow.com/q/25202577/153923
        gsArgs.Add("-dBATCH");
        gsArgs.Add("-q");
        gsArgs.Add("-dNOPAUSE");
        gsArgs.Add("-dNOPROMPT");
        gsArgs.Add("-sDEVICE=pdfwrite");
        gsArgs.Add("-dPDFSETTINGS=/prepress");
        gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
        gsArgs.Add(sb.ToString());
        processor.Process(gsArgs.ToArray());
    }
}

回答1:


You might be able to do something like this (unable to test code right now, but the principle of it checks out based on the Ghostscript.NET repo):

var prcPath = "PATH"; //a path to store the temporary files
var pageCount = GetPDFPageCount(source);
var list = SplitPDFatIndex(source, prcPath, indexN);

private static List<String> SplitPDFatIndex(String pathToFile, String tempPath, int index)
{
    var outList = new List<String>();
    outList.Add(SlicePDFatIndex(pathToFile, tempPath, index, true);
    outlist.Add(null); // Alternatively modify method below to permit pulling page N
    outList.Add(SlicePDFatIndex(pathToFile, tempPath, index, false);

    return outList;
}

private static String SlicePDFatIndex(String pathToFile, String tempPath, int index, bool lessThanIndex)
{
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var pageFrom = 1;
        var pageTo = index - 1;
        var name = tempPath + "temp_left.pdf";

        if (!lessThanIndex)
        {
            pageFrom = index + 1;
            pageTo = pageCount;
            name = tempPath + "temp_right.pdf";
        }

        var gsArgs = new List<String>();
        gsArgs.Add("-empty");
        gsArgs.Add("-dBATCH");
        gsArgs.Add("-q");
        gsArgs.Add("-dNOPAUSE");
        gsArgs.Add("-dNOPROMPT");
        gsArgs.Add("-sDEVICE=pdfwrite");
        gsArgs.Add("-dPDFSETTINGS=/prepress");
        gsArgs.Add(String.Format(@"-f{0}", pathToFile);
        gsArgs.Add("-dFirstPage=" + pageFrom.ToString());
        gsArgs.Add("-dLastPage=" + pageTo.ToString());
        gsArgs.Add(String.Format(@"-sOutputFile={0}", name));
        processor.Process(@"-f{0}", pathToFile);

        return name;
}

private static int GetPDFPageCount(String pathToFile)
{
    var count;
    var GhostscriptViewer viewer;

    viewer = new GhostscriptViewer();
    viewer.ShowPageAfterOpen = false;
    viewer.ProgressiveUpdate = false;
    viewer.Open(source); // try (source, version, false) or (source, version, true) if for some reason it hangs up here
    count = viewer.LastPageNumber;
    viewer.Close()

    return count;
}



回答2:


I'm going to add an answer based on what I read in baaron's post here:

Convert PDF to JPG / Images without using a specific C# Library

I modified his code, and I think it will satisfy my needs. Like KenS posted in a comment above, though, this will continue to lose quality each time it is run.

/// <summary>
/// Replaces document at provided index with new document.
/// Use with Caution! If you continuously cycle using the output as the input,
/// then you run repeated risks of information or quality loss.
/// </summary>
/// <param name="source">String. Full File Path to Source</param>
/// <param name="documentN">String. Full File Path to new document</param>
/// <param name="indexN">int. Index where file needs to go</param>
public static void GhostscriptNetReplace(String source, String documentN, int indexN)
{
    var list = new List<String>();
    var version = GhostscriptVersionInfo.GetLastInstalledVersion();
    var fullPath = Path.GetFullPath(source);
    int index = -1;
    using (var rasterizer = new Ghostscript.NET.Rasterizer.GhostscriptRasterizer())
    {
        rasterizer.Open(source, version, false);
        for (index = 0; index < rasterizer.PageCount; index++)
        {
            if (index != indexN)
            {
                var extracted = Path.Combine(fullPath, String.Format("~1_{0}.jpg", index));
                if (File.Exists(extracted))
                {
                    File.Delete(extracted);
                }
                var img = rasterizer.GetPage(300, 300, index);
                img.Save(extracted, ImageFormat.Jpeg);
                list.Add(extracted);
            } else
            {
                list.Add(documentN);
            }
        }
        if (index == indexN) // occurs if adding a page to the end
        {
            list.Add(documentN);
        }
    }
    var output_file = (source.IndexOf(' ') == -1) ? source : String.Format("\"{0}\"", source);
    using (var processor = new GhostscriptProcessor(version, false))
    {
        var gsArgs = new List<String>();
        gsArgs.Add("-empty"); // first argument is ignored. REF: https://stackoverflow.com/q/25202577/153923
        gsArgs.Add("-dBATCH");
        gsArgs.Add("-q");
        gsArgs.Add("-dNOPAUSE");
        gsArgs.Add("-dNOPROMPT");
        gsArgs.Add("-sDEVICE=pdfwrite");
        gsArgs.Add("-dPDFSETTINGS=/prepress");
        gsArgs.Add(String.Format(@"-sOutputFile={0}", output_file));
        foreach (var fileName in list)
        {
            var source = (fileName.IndexOf(' ') == -1) ? fileName : String.Format("\"{0}\"", fileName);
            gsArgs.Add(source);
        }
        processor.Process(gsArgs.ToArray());
    }
    foreach (var fileName in list) // delete the temp files
    {
        File.Delete(fileName);
    }
}

Work has decided to put this off for now because they are not ready to risk losing information quality.

This code, then, is put out there as untested.

In theory, it should work.

If it helps, please let me know. I hate following up with answers to my own questions if no one ever looks at it.




回答3:


From my related post:

You could use the PDF Toolkit PDFtk:

Example:

pdftk A=inA.pdf B=inB.pdf cat A1-12 B3 A14-end output out1.pdf

The output consists of the first 12 pages of inA.pdf, followed by page 3 of inB.pdf and then pages 14 until end of inA.pdf.

Many Linux distributions provide a PDFtk package you can download and install using their package manager.



来源:https://stackoverflow.com/questions/34748511/replace-specific-document-in-pdf

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!