ai-trash-can/train_mobilenetv2.py

133 lines
4.1 KiB
Python

import time
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
from torch.optim.lr_scheduler import ReduceLROnPlateau
if __name__=="__main__":
start_time = time.time()
# 设置随机种子,以确保结果可重复
torch.manual_seed(114514)
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 数据增强和标准化
transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# 数据加载
train_dir = './train_data/1/train'
test_dir = 'train_data/1/test'
# 训练轮数
num_epochs = 20
#加载数据集
train_dataset = datasets.ImageFolder(train_dir, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = datasets.ImageFolder(test_dir, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)
# 构建MobileNetV2模型
model = models.mobilenet_v2(pretrained=True)
num_classes = len(train_dataset.classes)
model.classifier[1] = nn.Linear(in_features=1280, out_features=num_classes)
# 将模型移动到设备上
model = model.to(device)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
# 添加ReduceLROnPlateau调度器
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, verbose=False)
print(f"train data:{len(train_loader)}")
print(f"test data:{len(test_loader)}")
print(f"epochs:{num_epochs}")
# 训练模型
print("start training")
min_loss = 10000.0
max_accuracy = 0
#temp = time.time()
for epoch in range(num_epochs):
train_start_time = time.time()
print(f"turn {epoch + 1}:")
current_lr = optimizer.param_groups[0]['lr']
print(f"Current learning rate: {current_lr}")
model.train()
running_loss = 0.0
for inputs, labels in tqdm(train_loader, desc="training", unit="item", ncols=100):
inputs, labels = inputs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
train_end_time = time.time()
print(f"Epoch {epoch + 1}/{num_epochs} Train loss: {running_loss / len(train_loader)} Train cost:{train_end_time -train_start_time}")
# 在测试集上评估模型
test_start_time = time.time()
model.eval()
correct = 0
total = 0
val_loss = 0.0
with torch.no_grad():
for inputs, labels in test_loader:
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
val_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
val_loss /= len(test_loader)
# 更新学习率
scheduler.step(val_loss)
test_end_time = time.time()
accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}% Test loss:{ val_loss } test cost:{ test_end_time - test_start_time }")
# 保存模型
if val_loss < min_loss or max_accuracy < accuracy:
min_loss = val_loss
max_accuracy = accuracy
torch.save(model.state_dict(), f"./model/1/epochs{epoch + 1} {accuracy:.2f}.pt")
print("model saved")
print("all finish")
'''
torch.save(model.state_dict(), f"./model/1/epochs{epoch + 1} {accuracy:.2f}.pt")
print("final model save")
'''
print(f"time:{time.time()-start_time}")