package main import ( "bytes" "fmt" "github.com/ledongthuc/pdf" "io/ioutil" "regexp" "strings" ) func GetAllFile(pathname string, pdf_list []string, all_file_list []string) ([]string, []string, error) { rd, err := ioutil.ReadDir(pathname) if err != nil { fmt.Println("read dir fail:", err) return pdf_list, all_file_list, err } for _, fi := range rd { if !fi.IsDir() { fullName := fi.Name() if strings.Index(fullName, ".pdf") != -1 { pdf_list = append(pdf_list, fullName) } if strings.Index(fullName, ".xlsx") != -1 && strings.Index(fullName, ".pdf") != -1 { all_file_list = append(all_file_list, fullName) } } } return pdf_list, all_file_list, nil } func ReadPdf(path string) (string, error) { f, r, err := pdf.Open(path) // remember close file defer f.Close() if err != nil { return "", err } var buf bytes.Buffer b, err := r.GetPlainText() if err != nil { return "", err } buf.ReadFrom(b) return buf.String(), nil } func re(str string) (re_str string) { reg := regexp.MustCompile(`款号:(.+?)颜色`) re_str = reg.FindString(str) //re_str = reg.FindAllString(str, -1) if len(re_str) > 0 { return re_str } else { return } } func main() { var pdf_list []string var all_file_list []string // 读取文件夹内的所有 pdf && pdf&excel pdf_list, all_file_list, _ = GetAllFile(".", pdf_list, all_file_list) // 循环读取 pdf, 获取所有款号 // var code []string for _, p := range pdf_list { str, _ := ReadPdf(p) re_str := re(str) fmt.Println(re_str) } }