# -*- coding:utf-8 -*- # author: peng # file: mypdf.py # time: 2021/9/8 17:47 # desc:压缩PDF,对纯图片的PDF效果效果较好,有文字内容的可能会比较模糊,推荐高质量的压缩 import fitz from PIL import Image import os from shutil import copyfile, rmtree from math import ceil from time import strftime, localtime, time import easygui as g from functools import wraps # 时间计数装饰器,func如果有return值,必须返回才能有值 def runtime(func): @wraps(func) def wrapper(*args, **kwargs): print(strftime("%Y-%m-%d %H:%M:%S", localtime())) start = time() func_return = func(*args, **kwargs) end = time() print(func.__name__, args[-1], args[-2], " spend time ", end - start, " sec") return func_return return wrapper class Luban(object): def __init__(self, quality, ignoreBy=102400): self.ignoreBy = ignoreBy self.quality = quality def setPath(self, path): self.path = path def setTargetDir(self, foldername="target"): self.dir, self.filename = os.path.split(self.path) self.targetDir = os.path.join(self.dir, foldername) if not os.path.exists(self.targetDir): os.makedirs(self.targetDir) self.targetPath = os.path.join(self.targetDir, "c_" + self.filename) def load(self): self.img = Image.open(self.path) if self.img.mode == "RGB": self.type = "JPEG" elif self.img.mode == "RGBA": self.type = "PNG" else: # 其他的图片就转成JPEG self.img = self.img.convert("RGB") self.type = "JPEG" def computeScale(self): # 计算缩小的倍数 srcWidth, srcHeight = self.img.size srcWidth = srcWidth + 1 if srcWidth % 2 == 1 else srcWidth srcHeight = srcHeight + 1 if srcHeight % 2 == 1 else srcHeight longSide = max(srcWidth, srcHeight) shortSide = min(srcWidth, srcHeight) scale = shortSide / longSide if (scale <= 1 and scale > 0.5625): if (longSide < 1664): return 1 elif (longSide < 4990): return 2 elif (longSide > 4990 and longSide < 10240): return 4 else: return max(1, longSide // 1280) elif (scale <= 0.5625 and scale > 0.5): return max(1, longSide // 1280) else: return ceil(longSide / (1280.0 / scale)) def compress(self): self.setTargetDir() # 先调整大小,再调整品质 if os.path.getsize(self.path) <= self.ignoreBy: copyfile(self.path, self.targetPath) else: self.load() scale = self.computeScale() srcWidth, srcHeight = self.img.size cache = self.img.resize((srcWidth // scale, srcHeight // scale), Image.ANTIALIAS) cache.save(self.targetPath, self.type, quality=self.quality) # 提取成图片 def covert2pic(doc, totaling, zooms=None): ''' :param totaling: pdf的页数 :param zooms: 值越大,分辨率越高,文件越清晰,列表内两个浮点数,每个尺寸的缩放系数,默认为分辨率的2倍 :return: ''' if zooms is None: zooms = [2.0, 2.0] if os.path.exists('.pdf'): # 临时文件,需为空 rmtree('.pdf') os.mkdir('.pdf') print(f"pdf页数为 {totaling} \n创建临时文件夹.....") for pg in range(totaling): page = doc[pg] print(f"\r{page}", end="") trans = fitz.Matrix(*zooms).preRotate(0) # 0为旋转角度 pm = page.getPixmap(matrix=trans, alpha=False) lurl = '.pdf/%s.jpg' % str(pg + 1) pm.writePNG(lurl) #保存 doc.close() # 图片合成pdf def pic2pdf(obj, ratio, totaling): doc = fitz.open() compressor = Luban(quality=ratio) for pg in range(totaling): path = '.pdf/%s.jpg' % str(pg + 1) compressor.setPath(path) compressor.compress() print(f"\r 插入图片 {pg + 1}/{totaling} 中......", end="") img = '.pdf/target/c_%s.jpg' % str(pg + 1) imgdoc = fitz.open(img) # 打开图片 pdfbytes = imgdoc.convertToPDF() # 使用图片创建单页的 PDF os.remove(img) imgpdf = fitz.open("pdf", pdfbytes) doc.insertPDF(imgpdf) # 将当前页插入文档 if os.path.exists(obj): # 若pdf文件存在先删除 os.remove(obj) doc.save(obj) # 保存pdf文件 doc.close() @runtime def pdfz(doc, obj, ratio, totaling): covert2pic(doc, totaling) pic2pdf(obj, ratio, totaling) def pic_quality(): print("输入压缩等级1~3:") comp_level = input("压缩等级(1=高画质50%,2=中画质70%,3=低画质80%):(输入数字并按回车键)") # 用字典模拟Switch分支,注意输入的值是str类型 ratio = {'1': 40, '2': 20, '3': 10} # 字典中没有则默认 低画质压缩 return ratio.get(comp_level, 10) if __name__ == "__main__": print("请选择需要压缩的PDF文件") while True: '''打开选择文件夹对话框''' filepath = g.fileopenbox(title=u"选择PDF", filetypes=['*.pdf']) if filepath == None: input("还未选择文件,输入任意键继续.......") continue else: filedir, filename = os.path.split(filepath) print(u'已选中文件【%s】' % (filename)) if filename.endswith(".pdf") == False: input("选择的文件类型不对,输入任意键继续.......") continue ratio = pic_quality() obj = "new_" + filename doc = fitz.open(filepath) totaling = doc.pageCount pdfz(doc, obj, ratio, totaling) rmtree('.pdf') oldsize = os.stat(filepath).st_size newsize = os.stat(obj).st_size print('压缩结果 %.2f M >>>> %.2f M'%(oldsize/(1024 * 1024),newsize/(1024 * 1024))) input(f"压缩已完成,文件保存在改程序目录下{filedir},如需继续压缩请按任意键")