import sys
sys.path=['', 'C:\\Users\\ASUS\\AppData\\Roaming\\Microsoft\\Windows\\Start Menu\\Programs\\Python 3.9', 'C:\\Users\\ASUS\\anaconda3\\python38.zip', 'C:\\Users\\ASUS\\anaconda3\\DLLs', 'C:\\Users\\ASUS\\anaconda3\\lib', 'C:\\Users\\ASUS\\anaconda3', 'C:\\Users\\ASUS\\anaconda3\\lib\\site-packages', 'C:\\Users\\ASUS\\anaconda3\\lib\\site-packages\\locket-0.2.1-py3.8.egg', 'C:\\Users\\ASUS\\anaconda3\\lib\\site-packages\\win32', 'C:\\Users\\ASUS\\anaconda3\\lib\\site-packages\\win32\\lib', 'C:\\Users\\ASUS\\anaconda3\\lib\\site-packages\\Pythonwin']
import re
import fitz
pdf = "顺丰控股:2019年年度报告(英文版).PDF"
pdf_cn = "顺丰控股:2019年年度报告.PDF"
doc = fitz.open(pdf)
toc_bookmarks = doc.get_toc()
def getText(page,pdf):#实际上12页要输入的page是13
text = ""
doc = fitz.open(pdf)
text = doc[page].get_text()
return(text)
page12 = getText(13,pdf)
page13 = getText(14,pdf_cn)
def count_num(text):
str = re.compile("[a-zA-Z]{0,}").findall(text)
str = [i for i in str if i]
return(len(str[-1]))
num12 = count_num(page12)
b = count_num(page12)
print("最后一个单词的长度为:",b)
def count_num_cn(text):
text = re.split(r"\.|\!|\?|。|!|?|\.{6}",text)
text = [i for i in text if i ]
amount = len(text[-2])#最后为空值
return amount
c = count_num_cn(page13)
print("最后一句的长度为:",c)