https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share
# -*- coding: utf-8 -*- """ Created on Wed Feb 3 09:32:22 2016 pdf单个文件提取测试 @author: Administrator """ import PyPDF2,os,openpyxl,sys,time,threading from openpyxl.cell import get_column_letter,column_index_from_string #测试的pdf提取文档 pdf_test="20160607_2.pdf" def single_Pdf_extract(filename): pdfFileObj=open(filename,'rb') pdfReader=PyPDF2.PdfFileReader(pdfFileObj) pages=pdfReader.numPages #显示页数 在第4100行时读取pdfReader也会出错 if pages>30: pages=30 #pageObj=pdfReader.getPage(0) #读取第一页的字符,第一页可读取 #content=pageObj.extractText() #输出第一页字符 #页面写入 content="" for page in range(pages): pageObj=pdfReader.getPage(page) #读取第一页的字符,第一页可读取 content+=pageObj.extractText() #输出第一页字符 pdfFileObj.close() return content content=single_Pdf_extract(pdf_test)
来源:https://www.cnblogs.com/webRobot/p/5883914.html