上传文件至「/」

求职者人物画像
This commit is contained in:
2026-04-02 11:40:54 +08:00
commit 131e6295e7
5 changed files with 526 additions and 0 deletions

View File

@@ -0,0 +1,57 @@
import PyPDF2
import os
from pathlib import Path
def specified_pdf_to_txt(pdf_file_path: str, save_dir: str):
"""
规则文件名与原PDF完全一致 | 已存在则跳过不重复转换
:param pdf_file_path: 待转换的PDF完整路径
:param save_dir: TXT文件保存目录
"""
# 1. 校验PDF文件是否存在
pdf_path = Path(pdf_file_path)
if not pdf_path.exists() or pdf_path.suffix.lower() != ".pdf":
print(f"错误文件不存在或不是PDF格式 → {pdf_file_path}")
return False
# 2. 创建保存目录(不存在则自动创建)
os.makedirs(save_dir, exist_ok=True)
# 3. 生成目标TXT路径**保留原PDF文件名仅改后缀**
txt_filename = pdf_path.stem + ".txt"
txt_save_path = Path(save_dir) / txt_filename
# 4. 关键已存在同名TXT → 跳过,不重复转换
if txt_save_path.exists():
print(f"跳过:{txt_filename} 已存在,无需重复转换")
return True
# 5. 执行PDF转TXT
try:
with open(pdf_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
full_text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
full_text += page_text + "\n\n"
# 写入TXT到指定目录
with open(txt_save_path, "w", encoding="utf-8") as f:
f.write(full_text)
print(f"✅ 转换成功:{pdf_path.name}{txt_save_path}")
return True
except Exception as e:
print(f"❌ 转换失败:{str(e)}")
return False
if __name__ == "__main__":
# 待转换的【指定PDF文件完整路径】
TARGET_PDF = "./前后端/陈盼良简历(2).pdf"
# TXT文件【指定保存目录】
SAVE_DIRECTORY = "./output/前后端"
# 执行转换
specified_pdf_to_txt(TARGET_PDF, SAVE_DIRECTORY)