上传文件至「Chat_history」

洋葱聊天记录
This commit is contained in:
2026-04-02 10:40:41 +08:00
commit 8419ed0ffb
13 changed files with 35004 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
import os
input_dir = "./output/目标条目"
output_dir = "./output/清洗结果"
def remove_asterisk(file_content):
"""仅删除文本中的所有*号,其他内容、结构完全不变"""
return file_content.replace("*", "")
def batch_remove_asterisk():
"""批量处理所有txt文件删除*号"""
os.makedirs(output_dir, exist_ok=True)
txt_files = [f for f in os.listdir(input_dir) if f.endswith(".txt")]
for file_name in txt_files:
input_path = os.path.join(input_dir, file_name)
output_path = os.path.join(output_dir, file_name)
try:
# 读取文件内容
with open(input_path, "r", encoding="utf-8") as f:
content = f.read()
# 仅删除*号
processed_content = remove_asterisk(content)
# 保存处理后的文件
with open(output_path, "w", encoding="utf-8") as f:
f.write(processed_content)
except Exception as e:
print(f"处理失败:{file_name} | 错误:{str(e)[:60]}")
continue
if __name__ == "__main__":
batch_remove_asterisk()