Part 26: Training Neural Network, Regularization Techniques

Regularization techniques help prevent overfitting by adding constraints or penalties to the model’s learning process. This section covers common regularization methods used in neural networks.

L2 Regularization (Weight Decay)

L2 regularization adds a penalty proportional to the square of the magnitude of the weights. This encourages the model to keep the weights small, helping to prevent overfitting.

Usage:

import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4)

In this example, weight_decay is the L2 penalty coefficient. The optimizer applies this penalty to the weights during the update step.

Dropout

Dropout is a technique where randomly selected neurons are ignored (dropped out) during training. This prevents the model from relying too heavily on any individual neuron, promoting more robust learning.

Dropout Layer Example:

import torch.nn as nn

class DropoutNN(nn.Module):
    def __init__(self):
        super(DropoutNN, self).__init__()
        self.fc1 = nn.Linear(10, 50)
        self.dropout = nn.Dropout(p=0.5)  # 50% dropout
        self.fc2 = nn.Linear(50, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

model = DropoutNN()

In this example, a Dropout layer with a dropout probability of 0.5 is added between the first and second fully connected layers.

Early Stopping

Early stopping is a technique where training is stopped when the model’s performance on a validation set stops improving. This helps to prevent overfitting by not training the model for too long.

Early Stopping Example

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

# Example neural network model
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(10, 50)
        self.fc2 = nn.Linear(50, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model, loss function, optimizer, and dataset
model = SimpleNN()
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
early_stopping = EarlyStopping(patience=10, min_delta=0.01)

dataset = TensorDataset(torch.randn(100, 10), torch.randn(100, 1))
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

val_inputs = torch.randn(32, 10)
val_targets = torch.randn(32, 1)

for epoch in range(100):
    model.train()
    for batch_inputs, batch_targets in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_inputs)
        loss = loss_fn(outputs, batch_targets)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(val_inputs)
        val_loss = loss_fn(val_outputs, val_targets)

    early_stopping(val_loss.item())
    if early_stopping.early_stop:
        print(f'Early stopping at epoch {epoch}')
        break

    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/100], Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')

print("Training completed.")

Example: Combining Regularization Techniques

Here is an example of combining L2 regularization, dropout, and early stopping in a single neural network model.

Combined Regularization Example

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class CombinedNN(nn.Module):
    def __init__(self):
        super(CombinedNN, self).__init__()
        self.fc1 = nn.Linear(10, 50)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(50, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

model = CombinedNN()
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
early_stopping = EarlyStopping(patience=10, min_delta=0.01)

dataset = TensorDataset(torch.randn(100, 10), torch.randn(100, 1))
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

val_inputs = torch.randn(32, 10)
val_targets = torch.randn(32, 1)

for epoch in range(100):
    model.train()
    for batch_inputs, batch_targets in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_inputs)
        loss = loss_fn(outputs, batch_targets)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(val_inputs)
        val_loss = loss_fn(val_outputs, val_targets)

    early_stopping(val_loss.item())
    if early_stopping.early_stop:
        print(f'Early stopping at epoch {epoch}')
        break

    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/100], Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')

print("Training completed.")

This example combines L2 regularization (weight decay), dropout, and early stopping to train a neural network effectively.

Monitoring and Debugging Training

Monitoring and debugging the training process are essential for ensuring that your neural network is learning correctly and efficiently. This section covers techniques and tools to help you keep track of training progress and identify issues.

Logging Training Progress

Logging the training and validation loss at regular intervals helps you understand how well the model is learning and if it’s overfitting or underfitting.

Simple Logging Example

for epoch in range(100):
    model.train()
    for batch_inputs, batch_targets in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_inputs)
        loss = loss_fn(outputs, batch_targets)
        loss.backward()
        optimizer.step()

    if epoch % 10 == 0:
        model.eval()
        with torch.no_grad():
            val_outputs = model(val_inputs)
            val_loss = loss_fn(val_outputs, val_targets)
        print(f'Epoch [{epoch}/100], Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')

Using TensorBoard for Visualization

TensorBoard is a powerful visualization tool that allows you to track and visualize various metrics during training, such as loss, accuracy, and histograms of weights and gradients.

TensorBoard Example

from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('runs/experiment1')

for epoch in range(100):
    model.train()
    for batch_inputs, batch_targets in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_inputs)
        loss = loss_fn(outputs, batch_targets)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_outputs = model(val_inputs)
        val_loss = loss_fn(val_outputs, val_targets)
    
    writer.add_scalar('Training Loss', loss.item(), epoch)
    writer.add_scalar('Validation Loss', val_loss.item(), epoch)

    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/100], Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')

writer.close()

To visualize the training progress, run the following command in your terminal:

sh
tensorboard --logdir=runs

Then, open your browser and navigate to http://localhost:6006.

Debugging Tools

PyTorch provides several tools for debugging, such as torch.autograd.profiler for profiling and torchviz for visualizing computation graphs.

Using torch.autograd.profiler:

import torch.autograd.profiler as profiler

with profiler.profile(record_shapes=True) as prof:
    outputs = model(inputs)
    loss = loss_fn(outputs, targets)
    loss.backward()

print(prof.key_averages().table(sort_by="cpu_time_total"))

Using torchviz for Graph Visualization:

from torchviz import make_dot

outputs = model(inputs)
make_dot(outputs, params=dict(model.named_parameters())).render("model_graph", format="png")

Example: Full Training Loop with Monitoring and Debugging

Here is an example combining logging, TensorBoard, and profiling in a full training loop.

Complete Training Loop Example

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter
import torch.autograd.profiler as profiler

class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(10, 50)
        self.fc2 = nn.Linear(50, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleNN()
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
writer = SummaryWriter('runs/experiment1')

dataset = TensorDataset(torch.randn(100, 10), torch.randn(100, 1))
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

val_inputs = torch.randn(32, 10)
val_targets = torch.randn(32, 1)

for epoch in range(100):
    model.train()
    for batch_inputs, batch_targets in dataloader:
        optimizer.zero_grad()
        with profiler.profile(record_shapes=True) as prof:
            outputs = model(batch_inputs)
            loss = loss_fn(outputs, batch_targets)
            loss.backward()
            optimizer.step()

        writer.add_scalar('Training Loss', loss.item(), epoch)
    
    model.eval()
    with torch.no_grad():
        val_outputs = model(val_inputs)
        val_loss = loss_fn(val_outputs, val_targets)
        writer.add_scalar('Validation Loss', val_loss.item(), epoch)
    
    if epoch % 10 == 0:
        print(f'Epoch [{epoch}/100], Loss: {loss.item():.4f}, Validation Loss: {val_loss.item():.4f}')
    
    print(prof.key_averages().table(sort_by="cpu_time_total"))

writer.close()

This example demonstrates how to log training progress, use TensorBoard for visualization, and profile the training process to identify bottlenecks.