单个pdf提取测试
sklearn实战-乳腺癌细胞数据挖掘 https://study.163.com/course/introduction.htm?courseId=1005269003&utm_campaign=commission&utm_source=cp-400000000398149&utm_medium=share # -*- coding: utf-8 -*- """ Created on Wed Feb 3 09:32:22 2016 pdf单个文件提取测试 @author: Administrator """ import PyPDF2,os,openpyxl,sys,time,threading from openpyxl.cell import get_column_letter,column_index_from_string #测试的pdf提取文档 pdf_test="20160607_2.pdf" def single_Pdf_extract(filename): pdfFileObj=open(filename,'rb') pdfReader=PyPDF2.PdfFileReader(pdfFileObj) pages=pdfReader.numPages #显示页数 在第4100行时读取pdfReader也会出错 if pages>30: pages=30