| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- package main
- import (
- "bytes"
- "fmt"
- "github.com/ledongthuc/pdf"
- "io/ioutil"
- "regexp"
- "strings"
- )
- func GetAllFile(pathname string, pdf_list []string, all_file_list []string) ([]string, []string, error) {
- rd, err := ioutil.ReadDir(pathname)
- if err != nil {
- fmt.Println("read dir fail:", err)
- return pdf_list, all_file_list, err
- }
- for _, fi := range rd {
- if !fi.IsDir() {
- fullName := fi.Name()
- if strings.Index(fullName, ".pdf") != -1 {
- pdf_list = append(pdf_list, fullName)
- }
- if strings.Index(fullName, ".xlsx") != -1 && strings.Index(fullName, ".pdf") != -1 {
- all_file_list = append(all_file_list, fullName)
- }
- }
- }
- return pdf_list, all_file_list, nil
- }
- func ReadPdf(path string) (string, error) {
- f, r, err := pdf.Open(path)
- // remember close file
- defer f.Close()
- if err != nil {
- return "", err
- }
- var buf bytes.Buffer
- b, err := r.GetPlainText()
- if err != nil {
- return "", err
- }
- buf.ReadFrom(b)
- return buf.String(), nil
- }
- func re(str string) (re_str string) {
- reg := regexp.MustCompile(`款号:(.+?)颜色`)
- re_str = reg.FindString(str)
- //re_str = reg.FindAllString(str, -1)
- if len(re_str) > 0 {
- return re_str
- } else {
- return
- }
- }
- func main() {
- var pdf_list []string
- var all_file_list []string
- // 读取文件夹内的所有 pdf && pdf&excel
- pdf_list, all_file_list, _ = GetAllFile(".", pdf_list, all_file_list)
- // 循环读取 pdf, 获取所有款号
- // var code []string
- for _, p := range pdf_list {
- str, _ := ReadPdf(p)
- re_str := re(str)
- fmt.Println(re_str)
- }
- }
|