main.go 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. package main
  2. import (
  3. "bytes"
  4. "fmt"
  5. "github.com/ledongthuc/pdf"
  6. "io/ioutil"
  7. "regexp"
  8. "strings"
  9. )
  10. func GetAllFile(pathname string, pdf_list []string, all_file_list []string) ([]string, []string, error) {
  11. rd, err := ioutil.ReadDir(pathname)
  12. if err != nil {
  13. fmt.Println("read dir fail:", err)
  14. return pdf_list, all_file_list, err
  15. }
  16. for _, fi := range rd {
  17. if !fi.IsDir() {
  18. fullName := fi.Name()
  19. if strings.Index(fullName, ".pdf") != -1 {
  20. pdf_list = append(pdf_list, fullName)
  21. }
  22. if strings.Index(fullName, ".xlsx") != -1 && strings.Index(fullName, ".pdf") != -1 {
  23. all_file_list = append(all_file_list, fullName)
  24. }
  25. }
  26. }
  27. return pdf_list, all_file_list, nil
  28. }
  29. func ReadPdf(path string) (string, error) {
  30. f, r, err := pdf.Open(path)
  31. // remember close file
  32. defer f.Close()
  33. if err != nil {
  34. return "", err
  35. }
  36. var buf bytes.Buffer
  37. b, err := r.GetPlainText()
  38. if err != nil {
  39. return "", err
  40. }
  41. buf.ReadFrom(b)
  42. return buf.String(), nil
  43. }
  44. func re(str string) (re_str string) {
  45. reg := regexp.MustCompile(`款号:(.+?)颜色`)
  46. re_str = reg.FindString(str)
  47. //re_str = reg.FindAllString(str, -1)
  48. if len(re_str) > 0 {
  49. return re_str
  50. } else {
  51. return
  52. }
  53. }
  54. func main() {
  55. var pdf_list []string
  56. var all_file_list []string
  57. // 读取文件夹内的所有 pdf && pdf&excel
  58. pdf_list, all_file_list, _ = GetAllFile(".", pdf_list, all_file_list)
  59. // 循环读取 pdf, 获取所有款号
  60. // var code []string
  61. for _, p := range pdf_list {
  62. str, _ := ReadPdf(p)
  63. re_str := re(str)
  64. fmt.Println(re_str)
  65. }
  66. }