上传文件至「data」

This commit is contained in:
2026-04-02 10:50:57 +08:00
commit 71ff2ee5f6
3 changed files with 931 additions and 0 deletions

60
data/object_convert.py Normal file
View File

@@ -0,0 +1,60 @@
# 将0-家长1-销售转换
import os
import re
# 原始TXT文件所在目录你的data文件夹路径
INPUT_DIR = "./data_new/processed_txt_files"
# 处理后文件保存目录(自动创建,与原文件同名)
OUTPUT_DIR = "./data_role_replaced"
# 正则表达式匹配行首的“0:”或“1:”(确保只改角色标识,不改文本内容)
ROLE_PATTERN = re.compile(r'^([01]):', re.MULTILINE) # re.MULTILINE让^匹配每行开头
def replace_role_in_txt(txt_path, output_path):
"""
处理单个TXT文件将行首的0:→家长:1:→销售:
:param txt_path: 原始TXT路径
:param output_path: 处理后TXT保存路径
"""
# 1. 读取原始文件内容
with open(txt_path, 'r', encoding='utf-8') as f:
content = f.read()
# 2. 替换角色标识0→家长1→销售
def replace_match(match):
role_code = match.group(1) # 获取匹配到的“0”或“1”
return "家长:" if role_code == "0" else "销售:"
# 用自定义函数替换所有匹配项
replaced_content = ROLE_PATTERN.sub(replace_match, content)
# 3. 保存处理后的文件
with open(output_path, 'w', encoding='utf-8') as f:
f.write(replaced_content)
print(f"处理完成:{os.path.basename(txt_path)}")
def batch_replace_all_txt():
"""批量处理INPUT_DIR下所有TXT文件"""
# 1. 创建输出目录(不存在则自动创建)
os.makedirs(OUTPUT_DIR, exist_ok=True)
# 2. 筛选目录下所有TXT文件
txt_files = [f for f in os.listdir(INPUT_DIR) if f.endswith('.txt')]
if not txt_files:
print(f"未在 {INPUT_DIR} 目录找到TXT文件请检查路径")
return
# 3. 逐个处理TXT文件
print(f"共发现 {len(txt_files)} 个TXT文件开始批量替换角色...")
for txt_filename in txt_files:
input_path = os.path.join(INPUT_DIR, txt_filename)
output_path = os.path.join(OUTPUT_DIR, txt_filename)
replace_role_in_txt(input_path, output_path)
print(f"\n全部处理完成!文件已保存至:{os.path.abspath(OUTPUT_DIR)}")
if __name__ == "__main__":
batch_replace_all_txt()