Update model/modelling.py

This commit is contained in:
2026-02-27 11:37:55 +08:00
parent daa5d12fcd
commit 14ce733d36

View File

@@ -1,52 +1,57 @@
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from transformers import AutoModel from transformers import AutoModel
class TransClassifier(nn.Module): class TransClassifier(nn.Module):
def __init__(self, model_dir: str, device: str="cuda"): def __init__(self, model_dir: str, output_classes: int, device: str="cuda"):
super().__init__() super().__init__()
self.backbone = AutoModel.from_pretrained( self.backbone = AutoModel.from_pretrained(
model_dir, model_dir,
dtype = "bfloat16" dtype = "bfloat16",
).to(device).eval() attn_implementation="flash_attention_2"
self.device = device ).to(device).eval()
self.torch_dtype = torch.bfloat16 self.device = device
self.hidden_size = self.backbone.config.hidden_size self.torch_dtype = torch.bfloat16
self.hidden_size = self.backbone.config.hidden_size
self.classifier = nn.Sequential(
nn.LayerNorm(self.hidden_size), self.token_proj = nn.Linear(self.hidden_size, self.hidden_size).to(device=device, dtype=self.torch_dtype)
nn.Linear(self.hidden_size, self.hidden_size//2), self.classifier = nn.Sequential(
nn.GELU(), nn.LayerNorm(self.hidden_size),
nn.Dropout(0.3), nn.Linear(self.hidden_size, self.hidden_size//2),
nn.Linear(self.hidden_size//2, self.hidden_size//4), nn.GELU(),
nn.GELU(), nn.Dropout(0.3),
nn.Dropout(0.2), nn.Linear(self.hidden_size//2, self.hidden_size//4),
nn.Linear(self.hidden_size//4, 2) nn.GELU(),
).to(device=device, dtype=self.torch_dtype) nn.Dropout(0.2),
nn.Linear(self.hidden_size//4, output_classes)
for param in self.backbone.parameters(): ).to(device=device, dtype=self.torch_dtype)
param.requires_grad = False
for param in self.backbone.parameters():
def forward(self, model_inputs: dict): param.requires_grad = False
outputs = self.backbone(**model_inputs)
def forward(self, model_inputs: dict):
last_hidden_state = outputs.last_hidden_state outputs = self.backbone(**model_inputs)
# take last token hidden state proj_states = self.token_proj(outputs.last_hidden_state)
cls_hidden_state = last_hidden_state[:, -1, :]
attention_mask = model_inputs['attention_mask']
logits = self.classifier(cls_hidden_state) mask_expanded = attention_mask.unsqueeze(-1).expand_as(proj_states).to(proj_states.dtype)
return logits sum_states = (proj_states * mask_expanded).sum(dim=1)
valid_tokens = mask_expanded.sum(dim=1)
if __name__ == "__main__": pooled = sum_states / valid_tokens.clamp(min=1e-9)
model_dir = r"C:\Users\GA\Desktop\models\Qwen3-1.7B"
device = "cuda" logits = self.classifier(pooled)
model = TransClassifier(model_dir, device) return logits
print(model.hidden_size)
print(model) if __name__ == "__main__":
model_dir = r"C:\Users\GA\Desktop\models\Qwen3-1.7B"
total_params = sum(p.numel() for p in model.parameters()) device = "cuda"
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) model = TransClassifier(model_dir, device)
print(model.hidden_size)
print(f"总参数量: {total_params:,}") print(model)
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"总参数量: {total_params:,}")
print(f"可训练参数量: {trainable_params:,}") print(f"可训练参数量: {trainable_params:,}")