| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188 |
- # -*- coding:utf-8 -*-
- # author: peng
- # file: mypdf.py
- # time: 2021/9/8 17:47
- # desc:压缩PDF,对纯图片的PDF效果效果较好,有文字内容的可能会比较模糊,推荐高质量的压缩
- import fitz
- from PIL import Image
- import os
- from shutil import copyfile, rmtree
- from math import ceil
- from time import strftime, localtime, time
- import easygui as g
- from functools import wraps
- # 时间计数装饰器,func如果有return值,必须返回才能有值
- def runtime(func):
- @wraps(func)
- def wrapper(*args, **kwargs):
- print(strftime("%Y-%m-%d %H:%M:%S", localtime()))
- start = time()
- func_return = func(*args, **kwargs)
- end = time()
- print(func.__name__, args[-1], args[-2], " spend time ", end - start, " sec")
- return func_return
- return wrapper
- class Luban(object):
- def __init__(self, quality, ignoreBy=102400):
- self.ignoreBy = ignoreBy
- self.quality = quality
- def setPath(self, path):
- self.path = path
- def setTargetDir(self, foldername="target"):
- self.dir, self.filename = os.path.split(self.path)
- self.targetDir = os.path.join(self.dir, foldername)
- if not os.path.exists(self.targetDir):
- os.makedirs(self.targetDir)
- self.targetPath = os.path.join(self.targetDir, "c_" + self.filename)
- def load(self):
- self.img = Image.open(self.path)
- if self.img.mode == "RGB":
- self.type = "JPEG"
- elif self.img.mode == "RGBA":
- self.type = "PNG"
- else: # 其他的图片就转成JPEG
- self.img = self.img.convert("RGB")
- self.type = "JPEG"
- def computeScale(self):
- # 计算缩小的倍数
- srcWidth, srcHeight = self.img.size
- srcWidth = srcWidth + 1 if srcWidth % 2 == 1 else srcWidth
- srcHeight = srcHeight + 1 if srcHeight % 2 == 1 else srcHeight
- longSide = max(srcWidth, srcHeight)
- shortSide = min(srcWidth, srcHeight)
- scale = shortSide / longSide
- if (scale <= 1 and scale > 0.5625):
- if (longSide < 1664):
- return 1
- elif (longSide < 4990):
- return 2
- elif (longSide > 4990 and longSide < 10240):
- return 4
- else:
- return max(1, longSide // 1280)
- elif (scale <= 0.5625 and scale > 0.5):
- return max(1, longSide // 1280)
- else:
- return ceil(longSide / (1280.0 / scale))
- def compress(self):
- self.setTargetDir()
- # 先调整大小,再调整品质
- if os.path.getsize(self.path) <= self.ignoreBy:
- copyfile(self.path, self.targetPath)
- else:
- self.load()
- scale = self.computeScale()
- srcWidth, srcHeight = self.img.size
- cache = self.img.resize((srcWidth // scale, srcHeight // scale),
- Image.ANTIALIAS)
- cache.save(self.targetPath, self.type, quality=self.quality)
- # 提取成图片
- def covert2pic(doc, totaling, zooms=None):
- '''
- :param totaling: pdf的页数
- :param zooms: 值越大,分辨率越高,文件越清晰,列表内两个浮点数,每个尺寸的缩放系数,默认为分辨率的2倍
- :return:
- '''
- if zooms is None:
- zooms = [2.0, 2.0]
- if os.path.exists('.pdf'): # 临时文件,需为空
- rmtree('.pdf')
- os.mkdir('.pdf')
- print(f"pdf页数为 {totaling} \n创建临时文件夹.....")
- for pg in range(totaling):
- page = doc[pg]
- print(f"\r{page}", end="")
- trans = fitz.Matrix(*zooms).preRotate(0) # 0为旋转角度
- pm = page.getPixmap(matrix=trans, alpha=False)
- lurl = '.pdf/%s.jpg' % str(pg + 1)
- pm.writePNG(lurl) #保存
- doc.close()
- # 图片合成pdf
- def pic2pdf(obj, ratio, totaling):
- doc = fitz.open()
- compressor = Luban(quality=ratio)
- for pg in range(totaling):
- path = '.pdf/%s.jpg' % str(pg + 1)
- compressor.setPath(path)
- compressor.compress()
- print(f"\r 插入图片 {pg + 1}/{totaling} 中......", end="")
- img = '.pdf/target/c_%s.jpg' % str(pg + 1)
- imgdoc = fitz.open(img) # 打开图片
- pdfbytes = imgdoc.convertToPDF() # 使用图片创建单页的 PDF
- os.remove(img)
- imgpdf = fitz.open("pdf", pdfbytes)
- doc.insertPDF(imgpdf) # 将当前页插入文档
- if os.path.exists(obj): # 若pdf文件存在先删除
- os.remove(obj)
- doc.save(obj) # 保存pdf文件
- doc.close()
- @runtime
- def pdfz(doc, obj, ratio, totaling):
- covert2pic(doc, totaling)
- pic2pdf(obj, ratio, totaling)
- def pic_quality():
- print("输入压缩等级1~3:")
- comp_level = input("压缩等级(1=高画质50%,2=中画质70%,3=低画质80%):(输入数字并按回车键)")
- # 用字典模拟Switch分支,注意输入的值是str类型
- ratio = {'1': 40, '2': 20, '3': 10}
- # 字典中没有则默认 低画质压缩
- return ratio.get(comp_level, 10)
- if __name__ == "__main__":
- print("请选择需要压缩的PDF文件")
- while True:
- '''打开选择文件夹对话框'''
- filepath = g.fileopenbox(title=u"选择PDF", filetypes=['*.pdf'])
- if filepath == None:
- input("还未选择文件,输入任意键继续.......")
- continue
- else:
- filedir, filename = os.path.split(filepath)
- print(u'已选中文件【%s】' % (filename))
- if filename.endswith(".pdf") == False:
- input("选择的文件类型不对,输入任意键继续.......")
- continue
- ratio = pic_quality()
- obj = "new_" + filename
- doc = fitz.open(filepath)
- totaling = doc.pageCount
- pdfz(doc, obj, ratio, totaling)
- rmtree('.pdf')
- oldsize = os.stat(filepath).st_size
-
- newsize = os.stat(obj).st_size
- print('压缩结果 %.2f M >>>> %.2f M'%(oldsize/(1024 * 1024),newsize/(1024 * 1024)))
- input(f"压缩已完成,文件保存在改程序目录下{filedir},如需继续压缩请按任意键")
|