import PyPDF2 import os from pathlib import Path def specified_pdf_to_txt(pdf_file_path: str, save_dir: str): """ 规则:文件名与原PDF完全一致 | 已存在则跳过不重复转换 :param pdf_file_path: 待转换的PDF完整路径 :param save_dir: TXT文件保存目录 """ # 1. 校验PDF文件是否存在 pdf_path = Path(pdf_file_path) if not pdf_path.exists() or pdf_path.suffix.lower() != ".pdf": print(f"错误:文件不存在或不是PDF格式 → {pdf_file_path}") return False # 2. 创建保存目录(不存在则自动创建) os.makedirs(save_dir, exist_ok=True) # 3. 生成目标TXT路径(**保留原PDF文件名,仅改后缀**) txt_filename = pdf_path.stem + ".txt" txt_save_path = Path(save_dir) / txt_filename # 4. 关键:已存在同名TXT → 跳过,不重复转换 if txt_save_path.exists(): print(f"跳过:{txt_filename} 已存在,无需重复转换") return True # 5. 执行PDF转TXT try: with open(pdf_path, "rb") as f: reader = PyPDF2.PdfReader(f) full_text = "" for page in reader.pages: page_text = page.extract_text() if page_text: full_text += page_text + "\n\n" # 写入TXT到指定目录 with open(txt_save_path, "w", encoding="utf-8") as f: f.write(full_text) print(f"✅ 转换成功:{pdf_path.name} → {txt_save_path}") return True except Exception as e: print(f"❌ 转换失败:{str(e)}") return False if __name__ == "__main__": # 待转换的【指定PDF文件完整路径】 TARGET_PDF = "./前后端/陈盼良简历(2).pdf" # TXT文件【指定保存目录】 SAVE_DIRECTORY = "./output/前后端" # 执行转换 specified_pdf_to_txt(TARGET_PDF, SAVE_DIRECTORY)