Python 合并pdf 文件

演技担当黄晓明 · 2023-09-21 21:43:27

File.py

#!/usr/bin/python2.6
# -*- coding: utf-8 -*-

import pandas as pd
import os


def get_abs_path(relative_path: str) -> str:
    cwd_path = os.getcwd()
    full_path = os.path.join(cwd_path, relative_path)
    return full_path


def find_file(search_path, readsub = False,include_str=None, filter_strs=None):
    """
    查找指定目录下所有的文件（不包含以__开头和结尾的文件）或指定格式的文件，若不同目录存在相同文件名，只返回第1个文件的路径
    :param search_path: 查找的目录路径
    :param include_str: 获取包含字符串的名称
    :param filter_strs: 过滤包含字符串的名称
    """
    if filter_strs is None:
        filter_strs = []

    files = []
    # 获取路径下所有文件
    names = os.listdir(search_path)
    for name in names:
        relativePath = os.path.join(search_path, name)
        absPath = os.path.abspath(relativePath)
        if os.path.isfile(absPath):
            # 如果不包含指定字符串则
            if include_str is not None and include_str not in name:
                continue

            # 如果未break，说明不包含filter_strs中的字符
            for filter_str in filter_strs:
                if filter_str in name:
                    break
            else:
                files.append(absPath)
        else:
            # 递归调用
            if(readsub):
                files += find_file(absPath, include_str=include_str, filter_strs=filter_strs)

    return files


def get_info(str_path1: str) -> dict:
    dic: dict = {}
    file_name = os.path.basename(str_path1)
    strSplit = "-"
    file_name = file_name.replace(".早.", "早-")
    file_name = file_name.replace(".午.", "午-")
    file_name = file_name.replace(".晚.", "晚-")
    file_name = file_name.replace(".pdf", "")
    substrings = file_name.split(strSplit)
    cnt = len(substrings)
    # 前面的是日期，後面的是時間
    if (cnt > 1):
        dic["Date"] = substrings[0].replace(".", "-")
        dic["Money"] = substrings[1]
    return dic

pdfcombine.py

#!/usr/bin/python2.6
# -*- coding: utf-8 -*-

from PyPDF2 import PdfReader, PdfWriter
import Files

merger = PdfWriter()

files = Files.find_file("./", False, ".pdf")
for pdf in files:
    pdf_reader = PdfReader(pdf)
    page_count = len(pdf_reader.pages)
    for index in range(page_count):
        merger.add_page(pdf_reader.pages[index])

merger.write(".\\output\\all.pdf")
merger.close()

WhyCan Forum(哇酷开发者社区)

楼主 # 2023-09-21 21:43:27 分享评论

Python 合并pdf 文件

页脚