merge_file.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. # -*- coding: utf-8 -*-
  2. import os
  3. import PyPDF2
  4. import re
  5. import shutil
  6. def main():
  7. path = './'
  8. pdf_list = []
  9. all_file_list = []
  10. for file_name in os.listdir(path):
  11. if '.pdf' in file_name:
  12. pdf_list.append(file_name)
  13. if '.pdf' in file_name or '.xlsx' in file_name:
  14. all_file_list.append(file_name)
  15. if not pdf_list:
  16. return -1
  17. for pdf_name in pdf_list:
  18. pdffile = open('./' + pdf_name, 'rb')
  19. pdfreader = PyPDF2.PdfFileReader(pdffile)
  20. page0 = pdfreader.getPage(0)
  21. pdf_text = page0.extractText()
  22. code = re.findall('款号:(.*?)颜色', pdf_text)
  23. if code:
  24. code = code[0]
  25. folder_name = './' + code
  26. if not os.path.exists(folder_name):
  27. os.mkdir(folder_name)
  28. target = './' + folder_name + '/' + pdf_name
  29. source = './' + pdf_name
  30. shutil.copyfile(source, target)
  31. excel_name = pdf_name.split('.')[0] + '.xlsx'
  32. excel_source = './' + excel_name
  33. excel_target = './' + folder_name + '/' + excel_name
  34. shutil.copyfile(excel_source, excel_target)
  35. pdffile.close
  36. for f in all_file_list:
  37. os.remove(f)
  38. main()
  39. print('ok')