Upload files to "/"
This commit is contained in:
149
train.py
Normal file
149
train.py
Normal file
@@ -0,0 +1,149 @@
|
||||
from data_process import build_dataloader
|
||||
from model import TransClassifier
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime
|
||||
import gc
|
||||
from tqdm import tqdm
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
class EarlyStopping:
|
||||
def __init__(self, patience=5, delta=0, path='checkpoint.pt'):
|
||||
self.patience = patience
|
||||
self.counter = 0
|
||||
self.best_score = None
|
||||
self.early_stop = False
|
||||
self.val_loss_min = np.inf
|
||||
self.delta = delta
|
||||
self.path = path
|
||||
|
||||
def __call__(self, val_loss, model):
|
||||
score = -val_loss
|
||||
|
||||
if self.best_score is None:
|
||||
self.best_score = score
|
||||
self.save_checkpoint(val_loss, model)
|
||||
elif score < self.best_score + self.delta:
|
||||
self.counter += 1
|
||||
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
|
||||
if self.counter >= self.patience:
|
||||
self.early_stop = True
|
||||
else:
|
||||
self.best_score = score
|
||||
self.save_checkpoint(val_loss, model)
|
||||
self.counter = 0
|
||||
|
||||
def save_checkpoint(self, val_loss, model):
|
||||
print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model...')
|
||||
torch.save(model.state_dict(), self.path)
|
||||
self.val_loss_min = val_loss
|
||||
|
||||
def train(backbone_dir, deal_folder, not_deal_folder,
|
||||
batch_size, initial_lr=1e-5, max_epochs=100,
|
||||
best_ckpt_path="best_ckpt.pth", final_ckpt_path="final_ckpt.pth", device="cuda"):
|
||||
|
||||
data_dict = build_dataloader(deal_folder, not_deal_folder, batch_size)
|
||||
train_loader = data_dict["train"]
|
||||
val_loader = data_dict["val"]
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(backbone_dir)
|
||||
model = TransClassifier(backbone_dir, device)
|
||||
model.to(device)
|
||||
|
||||
optimizer = torch.optim.AdamW(model.parameters(), lr=initial_lr)
|
||||
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
|
||||
|
||||
loss_func = nn.CrossEntropyLoss()
|
||||
|
||||
early_stopping = EarlyStopping(path=best_ckpt_path)
|
||||
history = {"train_loss": [], "val_loss": [], "epoch": []}
|
||||
|
||||
for epoch in range(max_epochs):
|
||||
model.train()
|
||||
total_loss = 0.0
|
||||
train_steps = 0
|
||||
|
||||
if epoch == 2:
|
||||
for param in model.backbone.parameters():
|
||||
param.requires_grad = True
|
||||
print("Unfreeze backbone parameters")
|
||||
|
||||
pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{max_epochs} [Train]')
|
||||
for batch_idx, (ids, texts, labels) in enumerate(pbar):
|
||||
labels = labels.to(device)
|
||||
|
||||
texts = tokenizer.apply_chat_template(texts, tokenize=False, add_generation_prompt=True, enable_thinking=False)
|
||||
inputs = tokenizer(texts, padding=True, truncation=True, max_length=2048, return_tensors="pt").to(device)
|
||||
with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16):
|
||||
outputs = model(inputs)
|
||||
loss = loss_func(outputs, labels)
|
||||
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
|
||||
optimizer.step()
|
||||
scheduler.step()
|
||||
|
||||
total_loss += loss.item()
|
||||
train_steps += 1
|
||||
|
||||
train_loss = total_loss / train_steps
|
||||
pbar.set_postfix({"train_loss": train_loss})
|
||||
|
||||
del texts, labels, outputs, loss
|
||||
torch.cuda.empty_cache()
|
||||
gc.collect()
|
||||
|
||||
val_loss = val(val_loader, model, loss_func, tokenizer, device)
|
||||
history["train_loss"].append(total_loss / len(train_loader))
|
||||
history["val_loss"].append(val_loss)
|
||||
history["epoch"].append(epoch+1)
|
||||
|
||||
print(f"Epoch {epoch+1}/{max_epochs}, Train Loss: {total_loss / len(train_loader):.4f}, Val Loss: {val_loss:.4f}")
|
||||
|
||||
early_stopping(val_loss, model)
|
||||
if early_stopping.early_stop:
|
||||
print("Early stopping")
|
||||
break
|
||||
|
||||
torch.save(model.state_dict(), final_ckpt_path)
|
||||
print(f"Final model saved to {final_ckpt_path}")
|
||||
|
||||
history_df = pd.DataFrame(history)
|
||||
history_df.to_csv("training_history.csv", index=False)
|
||||
print("Training history saved to training_history.csv")
|
||||
|
||||
def val(val_loader, model, loss_func, tokenizer, device):
|
||||
model.eval()
|
||||
val_loss = 0.0
|
||||
with torch.no_grad():
|
||||
for batch_idx, (ids, texts, labels) in enumerate(val_loader):
|
||||
labels = labels.to(device)
|
||||
|
||||
texts = tokenizer.apply_chat_template(texts, tokenize=False, add_generation_prompt=True, enable_thinking=False)
|
||||
inputs = tokenizer(texts, padding=True, truncation=True, max_length=2048, return_tensors="pt").to(device)
|
||||
with torch.amp.autocast(device_type="cuda", dtype=torch.bfloat16):
|
||||
outputs = model(inputs)
|
||||
loss = loss_func(outputs, labels)
|
||||
|
||||
val_loss += loss.item()
|
||||
return val_loss / len(val_loader)
|
||||
|
||||
if __name__ == "__main__":
|
||||
backbone_dir = r"C:\Users\GA\Desktop\models\Qwen3-1.7B"
|
||||
deal_folder = "deal"
|
||||
not_deal_folder = "not_deal"
|
||||
batch_size = 8
|
||||
device = "cuda"
|
||||
|
||||
|
||||
train(backbone_dir, deal_folder, not_deal_folder, batch_size, device=device)
|
||||
Reference in New Issue
Block a user