image classifier 2 binary heads SWA + TTA 22 ablations 訓練日期:2026-05-05 | 5090-2 dual-GPU autonomous agent | 來源:cvat2 project 7 (raicvat #12 移植) — 4536 train / 1170 val / 1765 test
import torch, timm, torch.nn as nn
class GenericClassifier(nn.Module):
def __init__(self, backbone, n_attr, feat_dim):
super().__init__()
self.backbone = timm.create_model(backbone, pretrained=False, num_classes=0, global_pool="avg")
self.dropout = nn.Dropout(0.3)
self.cls = nn.Linear(feat_dim, n_attr)
def forward(self, x): return self.cls(self.dropout(self.backbone(x)))
ckpt = torch.load("hatch_swa_v20260505.pt", weights_only=False)
# backbone_name = "convnext_tiny.fb_in1k", attrs = ["has_open", "has_close"]
# feat_dim = 768, img_size = 384
# thresholds = {"has_open": 0.50, "has_close": 0.76}(best.pt) 或 0.52/0.76(best_tta.pt)
model = GenericClassifier(ckpt["backbone_name"], len(ckpt["attrs"]), ckpt["feat_dim"]).eval()
model.load_state_dict(ckpt["model_state"])
# 推論:384×384 → ImageNet normalize → sigmoid 2 outputs
# TTA (best_tta.pt 適用): 加 hflip 一次平均
Generated 2026-05-05 | rai-vision-training | kaggle-reports.pages.dev | 8hr autonomous research on 5090-2