from data_process import build_dataloader from model import TransClassifier import torch import torch.nn as nn import torch.nn.functional as F from transformers import AutoTokenizer import pandas as pd import numpy as np import os import json from datetime import datetime import gc from tqdm import tqdm import warnings warnings.filterwarnings("ignore") class EarlyStopping: def __init__(self, patience=5, delta=0, path='checkpoint.pt'): self.patience = patience self.counter = 0 self.best_score = None self.early_stop = False self.val_loss_min = np.inf self.delta = delta self.path = path def __call__(self, val_loss, model): score = -val_loss if self.best_score is None: self.best_score = score self.save_checkpoint(val_loss, model) elif score < self.best_score + self.delta: self.counter += 1 print(f'EarlyStopping counter: {self.counter} out of {self.patience}') if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(val_loss, model) self.counter = 0 def save_checkpoint(self, val_loss, model): print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model...') torch.save(model.state_dict(), self.path) self.val_loss_min = val_loss def train(backbone_dir, deal_folder, not_deal_folder, batch_size, initial_lr=1e-5, max_epochs=100, best_ckpt_path="best_ckpt.pth", final_ckpt_path="final_ckpt.pth", device="cuda"): data_dict = build_dataloader(deal_folder, not_deal_folder, batch_size) train_loader = data_dict["train"] val_loader = data_dict["val"] tokenizer = AutoTokenizer.from_pretrained(backbone_dir) model = TransClassifier(backbone_dir, device) model.to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=initial_lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10) loss_func = nn.CrossEntropyLoss() early_stopping = EarlyStopping(path=best_ckpt_path) history = {"train_loss": [], "val_loss": [], "epoch": []} for epoch in range(max_epochs): model.train() total_loss = 0.0 train_steps = 0 if epoch == 2: for param in model.backbone.parameters(): param.requires_grad = True print("Unfreeze backbone parameters") pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{max_epochs} [Train]') for batch_idx, (ids, texts, labels) in enumerate(pbar): labels = labels.to(device) texts = tokenizer.apply_chat_template(texts, tokenize=False, add_generation_prompt=True, enable_thinking=False) inputs = tokenizer(texts, padding=True, truncation=True, max_length=2048, return_tensors="pt").to(device) with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16): outputs = model(inputs) loss = loss_func(outputs, labels) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) optimizer.step() scheduler.step() total_loss += loss.item() train_steps += 1 train_loss = total_loss / train_steps pbar.set_postfix({"train_loss": train_loss}) del texts, labels, outputs, loss torch.cuda.empty_cache() gc.collect() val_loss = val(val_loader, model, loss_func, tokenizer, device) history["train_loss"].append(total_loss / len(train_loader)) history["val_loss"].append(val_loss) history["epoch"].append(epoch+1) print(f"Epoch {epoch+1}/{max_epochs}, Train Loss: {total_loss / len(train_loader):.4f}, Val Loss: {val_loss:.4f}") early_stopping(val_loss, model) if early_stopping.early_stop: print("Early stopping") break torch.save(model.state_dict(), final_ckpt_path) print(f"Final model saved to {final_ckpt_path}") history_df = pd.DataFrame(history) history_df.to_csv("training_history.csv", index=False) print("Training history saved to training_history.csv") def val(val_loader, model, loss_func, tokenizer, device): model.eval() val_loss = 0.0 with torch.no_grad(): for batch_idx, (ids, texts, labels) in enumerate(val_loader): labels = labels.to(device) texts = tokenizer.apply_chat_template(texts, tokenize=False, add_generation_prompt=True, enable_thinking=False) inputs = tokenizer(texts, padding=True, truncation=True, max_length=2048, return_tensors="pt").to(device) with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16): outputs = model(inputs) loss = loss_func(outputs, labels) val_loss += loss.item() return val_loss / len(val_loader) if __name__ == "__main__": backbone_dir = r"C:\Users\GA\Desktop\models\Qwen3-1.7B" deal_folder = "deal" not_deal_folder = "not_deal" batch_size = 8 device = "cuda" train(backbone_dir, deal_folder, not_deal_folder, batch_size, device=device)