上传文件至「data」
This commit is contained in:
60
data/object_convert.py
Normal file
60
data/object_convert.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# 将0-家长,1-销售转换
|
||||
import os
|
||||
import re
|
||||
|
||||
# 原始TXT文件所在目录(你的data文件夹路径)
|
||||
INPUT_DIR = "./data_new/processed_txt_files"
|
||||
# 处理后文件保存目录(自动创建,与原文件同名)
|
||||
OUTPUT_DIR = "./data_role_replaced"
|
||||
# 正则表达式:匹配行首的“0:”或“1:”(确保只改角色标识,不改文本内容)
|
||||
ROLE_PATTERN = re.compile(r'^([01]):', re.MULTILINE) # re.MULTILINE让^匹配每行开头
|
||||
|
||||
|
||||
def replace_role_in_txt(txt_path, output_path):
|
||||
"""
|
||||
处理单个TXT文件:将行首的0:→家长:,1:→销售:
|
||||
:param txt_path: 原始TXT路径
|
||||
:param output_path: 处理后TXT保存路径
|
||||
"""
|
||||
# 1. 读取原始文件内容
|
||||
with open(txt_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# 2. 替换角色标识:0→家长,1→销售
|
||||
def replace_match(match):
|
||||
role_code = match.group(1) # 获取匹配到的“0”或“1”
|
||||
return "家长:" if role_code == "0" else "销售:"
|
||||
|
||||
# 用自定义函数替换所有匹配项
|
||||
replaced_content = ROLE_PATTERN.sub(replace_match, content)
|
||||
|
||||
# 3. 保存处理后的文件
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(replaced_content)
|
||||
|
||||
print(f"处理完成:{os.path.basename(txt_path)}")
|
||||
|
||||
|
||||
def batch_replace_all_txt():
|
||||
"""批量处理INPUT_DIR下所有TXT文件"""
|
||||
# 1. 创建输出目录(不存在则自动创建)
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
# 2. 筛选目录下所有TXT文件
|
||||
txt_files = [f for f in os.listdir(INPUT_DIR) if f.endswith('.txt')]
|
||||
if not txt_files:
|
||||
print(f"未在 {INPUT_DIR} 目录找到TXT文件,请检查路径!")
|
||||
return
|
||||
|
||||
# 3. 逐个处理TXT文件
|
||||
print(f"共发现 {len(txt_files)} 个TXT文件,开始批量替换角色...")
|
||||
for txt_filename in txt_files:
|
||||
input_path = os.path.join(INPUT_DIR, txt_filename)
|
||||
output_path = os.path.join(OUTPUT_DIR, txt_filename)
|
||||
replace_role_in_txt(input_path, output_path)
|
||||
|
||||
print(f"\n全部处理完成!文件已保存至:{os.path.abspath(OUTPUT_DIR)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
batch_replace_all_txt()
|
||||
Reference in New Issue
Block a user