60 lines
2.2 KiB
Python
60 lines
2.2 KiB
Python
# 将0-家长,1-销售转换
|
||
import os
|
||
import re
|
||
|
||
# 原始TXT文件所在目录(你的data文件夹路径)
|
||
INPUT_DIR = "./data_new/processed_txt_files"
|
||
# 处理后文件保存目录(自动创建,与原文件同名)
|
||
OUTPUT_DIR = "./data_role_replaced"
|
||
# 正则表达式:匹配行首的“0:”或“1:”(确保只改角色标识,不改文本内容)
|
||
ROLE_PATTERN = re.compile(r'^([01]):', re.MULTILINE) # re.MULTILINE让^匹配每行开头
|
||
|
||
|
||
def replace_role_in_txt(txt_path, output_path):
|
||
"""
|
||
处理单个TXT文件:将行首的0:→家长:,1:→销售:
|
||
:param txt_path: 原始TXT路径
|
||
:param output_path: 处理后TXT保存路径
|
||
"""
|
||
# 1. 读取原始文件内容
|
||
with open(txt_path, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
# 2. 替换角色标识:0→家长,1→销售
|
||
def replace_match(match):
|
||
role_code = match.group(1) # 获取匹配到的“0”或“1”
|
||
return "家长:" if role_code == "0" else "销售:"
|
||
|
||
# 用自定义函数替换所有匹配项
|
||
replaced_content = ROLE_PATTERN.sub(replace_match, content)
|
||
|
||
# 3. 保存处理后的文件
|
||
with open(output_path, 'w', encoding='utf-8') as f:
|
||
f.write(replaced_content)
|
||
|
||
print(f"处理完成:{os.path.basename(txt_path)}")
|
||
|
||
|
||
def batch_replace_all_txt():
|
||
"""批量处理INPUT_DIR下所有TXT文件"""
|
||
# 1. 创建输出目录(不存在则自动创建)
|
||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||
|
||
# 2. 筛选目录下所有TXT文件
|
||
txt_files = [f for f in os.listdir(INPUT_DIR) if f.endswith('.txt')]
|
||
if not txt_files:
|
||
print(f"未在 {INPUT_DIR} 目录找到TXT文件,请检查路径!")
|
||
return
|
||
|
||
# 3. 逐个处理TXT文件
|
||
print(f"共发现 {len(txt_files)} 个TXT文件,开始批量替换角色...")
|
||
for txt_filename in txt_files:
|
||
input_path = os.path.join(INPUT_DIR, txt_filename)
|
||
output_path = os.path.join(OUTPUT_DIR, txt_filename)
|
||
replace_role_in_txt(input_path, output_path)
|
||
|
||
print(f"\n全部处理完成!文件已保存至:{os.path.abspath(OUTPUT_DIR)}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
batch_replace_all_txt() |