149 lines
5.4 KiB
Python
149 lines
5.4 KiB
Python
from data_process import build_dataloader
|
|
from model import TransClassifier
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
from transformers import AutoTokenizer
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
import os
|
|
import json
|
|
from datetime import datetime
|
|
import gc
|
|
from tqdm import tqdm
|
|
import warnings
|
|
warnings.filterwarnings("ignore")
|
|
|
|
class EarlyStopping:
|
|
def __init__(self, patience=5, delta=0, path='checkpoint.pt'):
|
|
self.patience = patience
|
|
self.counter = 0
|
|
self.best_score = None
|
|
self.early_stop = False
|
|
self.val_loss_min = np.inf
|
|
self.delta = delta
|
|
self.path = path
|
|
|
|
def __call__(self, val_loss, model):
|
|
score = -val_loss
|
|
|
|
if self.best_score is None:
|
|
self.best_score = score
|
|
self.save_checkpoint(val_loss, model)
|
|
elif score < self.best_score + self.delta:
|
|
self.counter += 1
|
|
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
|
|
if self.counter >= self.patience:
|
|
self.early_stop = True
|
|
else:
|
|
self.best_score = score
|
|
self.save_checkpoint(val_loss, model)
|
|
self.counter = 0
|
|
|
|
def save_checkpoint(self, val_loss, model):
|
|
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model...')
|
|
torch.save(model.state_dict(), self.path)
|
|
self.val_loss_min = val_loss
|
|
|
|
def train(backbone_dir, deal_folder, not_deal_folder,
|
|
batch_size, initial_lr=1e-5, max_epochs=100,
|
|
best_ckpt_path="best_ckpt.pth", final_ckpt_path="final_ckpt.pth", device="cuda"):
|
|
|
|
data_dict = build_dataloader(deal_folder, not_deal_folder, batch_size)
|
|
train_loader = data_dict["train"]
|
|
val_loader = data_dict["val"]
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(backbone_dir)
|
|
model = TransClassifier(backbone_dir, device)
|
|
model.to(device)
|
|
|
|
optimizer = torch.optim.AdamW(model.parameters(), lr=initial_lr)
|
|
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
|
|
|
|
loss_func = nn.CrossEntropyLoss()
|
|
|
|
early_stopping = EarlyStopping(path=best_ckpt_path)
|
|
history = {"train_loss": [], "val_loss": [], "epoch": []}
|
|
|
|
for epoch in range(max_epochs):
|
|
model.train()
|
|
total_loss = 0.0
|
|
train_steps = 0
|
|
|
|
if epoch == 2:
|
|
for param in model.backbone.parameters():
|
|
param.requires_grad = True
|
|
print("Unfreeze backbone parameters")
|
|
|
|
pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{max_epochs} [Train]')
|
|
for batch_idx, (ids, texts, labels) in enumerate(pbar):
|
|
labels = labels.to(device)
|
|
|
|
texts = tokenizer.apply_chat_template(texts, tokenize=False, add_generation_prompt=True, enable_thinking=False)
|
|
inputs = tokenizer(texts, padding=True, truncation=True, max_length=2048, return_tensors="pt").to(device)
|
|
with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16):
|
|
outputs = model(inputs)
|
|
loss = loss_func(outputs, labels)
|
|
|
|
optimizer.zero_grad()
|
|
loss.backward()
|
|
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
|
optimizer.step()
|
|
scheduler.step()
|
|
|
|
total_loss += loss.item()
|
|
train_steps += 1
|
|
|
|
train_loss = total_loss / train_steps
|
|
pbar.set_postfix({"train_loss": train_loss})
|
|
|
|
del texts, labels, outputs, loss
|
|
torch.cuda.empty_cache()
|
|
gc.collect()
|
|
|
|
val_loss = val(val_loader, model, loss_func, tokenizer, device)
|
|
history["train_loss"].append(total_loss / len(train_loader))
|
|
history["val_loss"].append(val_loss)
|
|
history["epoch"].append(epoch+1)
|
|
|
|
print(f"Epoch {epoch+1}/{max_epochs}, Train Loss: {total_loss / len(train_loader):.4f}, Val Loss: {val_loss:.4f}")
|
|
|
|
early_stopping(val_loss, model)
|
|
if early_stopping.early_stop:
|
|
print("Early stopping")
|
|
break
|
|
|
|
torch.save(model.state_dict(), final_ckpt_path)
|
|
print(f"Final model saved to {final_ckpt_path}")
|
|
|
|
history_df = pd.DataFrame(history)
|
|
history_df.to_csv("training_history.csv", index=False)
|
|
print("Training history saved to training_history.csv")
|
|
|
|
def val(val_loader, model, loss_func, tokenizer, device):
|
|
model.eval()
|
|
val_loss = 0.0
|
|
with torch.no_grad():
|
|
for batch_idx, (ids, texts, labels) in enumerate(val_loader):
|
|
labels = labels.to(device)
|
|
|
|
texts = tokenizer.apply_chat_template(texts, tokenize=False, add_generation_prompt=True, enable_thinking=False)
|
|
inputs = tokenizer(texts, padding=True, truncation=True, max_length=2048, return_tensors="pt").to(device)
|
|
with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16):
|
|
outputs = model(inputs)
|
|
loss = loss_func(outputs, labels)
|
|
|
|
val_loss += loss.item()
|
|
return val_loss / len(val_loader)
|
|
|
|
if __name__ == "__main__":
|
|
backbone_dir = r"C:\Users\GA\Desktop\models\Qwen3-1.7B"
|
|
deal_folder = "deal"
|
|
not_deal_folder = "not_deal"
|
|
batch_size = 8
|
|
device = "cuda"
|
|
|
|
|
|
train(backbone_dir, deal_folder, not_deal_folder, batch_size, device=device) |