pdf_comp.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. # -*- coding:utf-8 -*-
  2. # author: peng
  3. # file: mypdf.py
  4. # time: 2021/9/8 17:47
  5. # desc:压缩PDF,对纯图片的PDF效果效果较好,有文字内容的可能会比较模糊,推荐高质量的压缩
  6. import fitz
  7. from PIL import Image
  8. import os
  9. from shutil import copyfile, rmtree
  10. from math import ceil
  11. from time import strftime, localtime, time
  12. import easygui as g
  13. from functools import wraps
  14. # 时间计数装饰器,func如果有return值,必须返回才能有值
  15. def runtime(func):
  16. @wraps(func)
  17. def wrapper(*args, **kwargs):
  18. print(strftime("%Y-%m-%d %H:%M:%S", localtime()))
  19. start = time()
  20. func_return = func(*args, **kwargs)
  21. end = time()
  22. print(func.__name__, args[-1], args[-2], " spend time ", end - start, " sec")
  23. return func_return
  24. return wrapper
  25. class Luban(object):
  26. def __init__(self, quality, ignoreBy=102400):
  27. self.ignoreBy = ignoreBy
  28. self.quality = quality
  29. def setPath(self, path):
  30. self.path = path
  31. def setTargetDir(self, foldername="target"):
  32. self.dir, self.filename = os.path.split(self.path)
  33. self.targetDir = os.path.join(self.dir, foldername)
  34. if not os.path.exists(self.targetDir):
  35. os.makedirs(self.targetDir)
  36. self.targetPath = os.path.join(self.targetDir, "c_" + self.filename)
  37. def load(self):
  38. self.img = Image.open(self.path)
  39. if self.img.mode == "RGB":
  40. self.type = "JPEG"
  41. elif self.img.mode == "RGBA":
  42. self.type = "PNG"
  43. else: # 其他的图片就转成JPEG
  44. self.img = self.img.convert("RGB")
  45. self.type = "JPEG"
  46. def computeScale(self):
  47. # 计算缩小的倍数
  48. srcWidth, srcHeight = self.img.size
  49. srcWidth = srcWidth + 1 if srcWidth % 2 == 1 else srcWidth
  50. srcHeight = srcHeight + 1 if srcHeight % 2 == 1 else srcHeight
  51. longSide = max(srcWidth, srcHeight)
  52. shortSide = min(srcWidth, srcHeight)
  53. scale = shortSide / longSide
  54. if (scale <= 1 and scale > 0.5625):
  55. if (longSide < 1664):
  56. return 1
  57. elif (longSide < 4990):
  58. return 2
  59. elif (longSide > 4990 and longSide < 10240):
  60. return 4
  61. else:
  62. return max(1, longSide // 1280)
  63. elif (scale <= 0.5625 and scale > 0.5):
  64. return max(1, longSide // 1280)
  65. else:
  66. return ceil(longSide / (1280.0 / scale))
  67. def compress(self):
  68. self.setTargetDir()
  69. # 先调整大小,再调整品质
  70. if os.path.getsize(self.path) <= self.ignoreBy:
  71. copyfile(self.path, self.targetPath)
  72. else:
  73. self.load()
  74. scale = self.computeScale()
  75. srcWidth, srcHeight = self.img.size
  76. cache = self.img.resize((srcWidth // scale, srcHeight // scale),
  77. Image.ANTIALIAS)
  78. cache.save(self.targetPath, self.type, quality=self.quality)
  79. # 提取成图片
  80. def covert2pic(doc, totaling, zooms=None):
  81. '''
  82. :param totaling: pdf的页数
  83. :param zooms: 值越大,分辨率越高,文件越清晰,列表内两个浮点数,每个尺寸的缩放系数,默认为分辨率的2倍
  84. :return:
  85. '''
  86. if zooms is None:
  87. zooms = [2.0, 2.0]
  88. if os.path.exists('.pdf'): # 临时文件,需为空
  89. rmtree('.pdf')
  90. os.mkdir('.pdf')
  91. print(f"pdf页数为 {totaling} \n创建临时文件夹.....")
  92. for pg in range(totaling):
  93. page = doc[pg]
  94. print(f"\r{page}", end="")
  95. trans = fitz.Matrix(*zooms).preRotate(0) # 0为旋转角度
  96. pm = page.getPixmap(matrix=trans, alpha=False)
  97. lurl = '.pdf/%s.jpg' % str(pg + 1)
  98. pm.writePNG(lurl) #保存
  99. doc.close()
  100. # 图片合成pdf
  101. def pic2pdf(obj, ratio, totaling):
  102. doc = fitz.open()
  103. compressor = Luban(quality=ratio)
  104. for pg in range(totaling):
  105. path = '.pdf/%s.jpg' % str(pg + 1)
  106. compressor.setPath(path)
  107. compressor.compress()
  108. print(f"\r 插入图片 {pg + 1}/{totaling} 中......", end="")
  109. img = '.pdf/target/c_%s.jpg' % str(pg + 1)
  110. imgdoc = fitz.open(img) # 打开图片
  111. pdfbytes = imgdoc.convertToPDF() # 使用图片创建单页的 PDF
  112. os.remove(img)
  113. imgpdf = fitz.open("pdf", pdfbytes)
  114. doc.insertPDF(imgpdf) # 将当前页插入文档
  115. if os.path.exists(obj): # 若pdf文件存在先删除
  116. os.remove(obj)
  117. doc.save(obj) # 保存pdf文件
  118. doc.close()
  119. @runtime
  120. def pdfz(doc, obj, ratio, totaling):
  121. covert2pic(doc, totaling)
  122. pic2pdf(obj, ratio, totaling)
  123. def pic_quality():
  124. print("输入压缩等级1~3:")
  125. comp_level = input("压缩等级(1=高画质50%,2=中画质70%,3=低画质80%):(输入数字并按回车键)")
  126. # 用字典模拟Switch分支,注意输入的值是str类型
  127. ratio = {'1': 40, '2': 20, '3': 10}
  128. # 字典中没有则默认 低画质压缩
  129. return ratio.get(comp_level, 10)
  130. if __name__ == "__main__":
  131. print("请选择需要压缩的PDF文件")
  132. while True:
  133. '''打开选择文件夹对话框'''
  134. filepath = g.fileopenbox(title=u"选择PDF", filetypes=['*.pdf'])
  135. if filepath == None:
  136. input("还未选择文件,输入任意键继续.......")
  137. continue
  138. else:
  139. filedir, filename = os.path.split(filepath)
  140. print(u'已选中文件【%s】' % (filename))
  141. if filename.endswith(".pdf") == False:
  142. input("选择的文件类型不对,输入任意键继续.......")
  143. continue
  144. ratio = pic_quality()
  145. obj = "new_" + filename
  146. doc = fitz.open(filepath)
  147. totaling = doc.pageCount
  148. pdfz(doc, obj, ratio, totaling)
  149. rmtree('.pdf')
  150. oldsize = os.stat(filepath).st_size
  151. newsize = os.stat(obj).st_size
  152. print('压缩结果 %.2f M >>>> %.2f M'%(oldsize/(1024 * 1024),newsize/(1024 * 1024)))
  153. input(f"压缩已完成,文件保存在改程序目录下{filedir},如需继续压缩请按任意键")