python 实现 pdf 转为图片

少杨 / 2023-08-10 / 原文

 1 import fitz
 2 import os
 3 from PIL import Image
 4 
 5 def convert_pdf2img(file_relative_path):
 6 
 7     page_num = 1
 8     filename = file_relative_path.split('.')[-2]
 9     if not os.path.exists(filename):
10         os.makedirs(filename)
11 
12     pdf = fitz.open(file_relative_path)
13     num_page = len(pdf)
14     print(num_page)
15     # image width , height
16     image_width = 960
17     image_height = 540
18     images = []
19 
20     for page in pdf:
21         rotate = int(0)
22         zoom_x = 2
23         zoom_y = 2
24         mat = fitz.Matrix(zoom_x,zoom_y)
25         pixmap = page.get_pixmap(matrix=mat, alpha=False)
26 
27         # image = Image.fromqpixmap(pixmap)
28         # image = Image.open(image_file)
29         # image = image.resize((image_width,image_height))
30         # images.append(image)
31         image_file = f"{filename}/{page_num}.png"
32         pixmap.pil_save(image_file)
33 
34         image = Image.open(image_file)
35         image = image.resize((image_width,image_height))
36         images.append(image)
37         print(f"第{page_num}保存图片完成")
38         page_num += 1
39 
40 
41     new_image = Image.new('RGB',(image_width,num_page*image_height))
42 
43     for index in range (0,num_page):
44     
45         start_height = index * image_height
46         print(index,start_height)
47         # new_image.paste(images[index],(0,0))
48         new_image.paste(images[index],(0,start_height))
49         # break
50 
51     new_image.save(f"{filename}.png")
52 
53 
54 if __name__ == "__main__":
55     file_relative_path = "天旰宝.pdf"
56     # file_relative_path = "精利王能量饮.pdf"
57     convert_pdf2img(file_relative_path)

图片处理库

pip install pillow

pdf处理库

pip install PyMuPDF

 

参考:https://zhuanlan.zhihu.com/p/570390414

https://blog.csdn.net/weixin_49030835/article/details/129158440