# encoding=utf8
#-*-coding:utf-8 -*-
#pip install pypdf2 -i https://pypi.tuna.tsinghua.edu.cn/simple
import PyPDF2
from io import StringIO
content_all_list = []
# 打开PDF文件并创建一个PyPDF2对象
with open('Scrum-Guide-Chinese-Simplified.pdf', 'rb') as fp:
pdf_reader = PyPDF2.PdfReader(fp)
#总页数
page_nums = len(pdf_reader.pages)
for i in range(0,page_nums):
# 获取PDF文档的第一页
page = pdf_reader.pages[i]
# 解析PDF页面并提取文本内容
text_content = page.extract_text()
print(text_content)
content_all_list.append(text_content)
print(content_all_list)
# 将文本内容写入到一个新的txt文件中
#print()方法在Win7的默认编码是gbk,它在打印时,并不是所有的字符都支持的,所以此处换成 gb18030
with open('Scrum-Guide-Chinese-Simplified.txt', 'a' ,encoding='gb18030') as txt_file:
for one in content_all_list:
txt_file.write(one)