testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)  # Download/load Fashion-MNIST test data

import torch  # Import PyTorch library
from torchvision import datasets, transforms  # Import datasets and image transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),  # Define transform: convert to tensor
                                transforms.Normalize((0.5,), (0.5,))])
# Download and load the training data
trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)  # Download/load Fashion-MNIST training data
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)  # Create training data loader

# Download and load the test data
testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)  # Download/load Fashion-MNIST test data
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)  # Create test data loader

from torch import nn, optim
import torch.nn.functional as F  # Import functional API (F.relu, etc.)

class Classifier(nn.Module):  # Define neural network class
    def __init__(self):  # Initialize network layers
        super().__init__()  # Initialize parent class
        self.fc1 = nn.Linear(784, 256)  # First fully connected layer
        self.fc2 = nn.Linear(256, 128)  # Second fully connected layer
        self.fc3 = nn.Linear(128, 64)  # Third fully connected layer
        self.fc4 = nn.Linear(64, 10)  # Output layer
        
    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)  # Flatten: (batch, 1, 28, 28) -> (batch, 784)
        
        x = F.relu(self.fc1(x))  # FC1 -> ReLU activation
        x = F.relu(self.fc2(x))  # FC2 -> ReLU activation
        x = F.relu(self.fc3(x))  # FC3 -> ReLU activation
        x = F.log_softmax(self.fc4(x), dim=1)  # FC4 -> LogSoftmax output
        
        return x  # Return output tensor

model = Classifier()  # Instantiate the Classifier model

images, labels = next(iter(testloader))
# Get the class probabilities
ps = torch.exp(model(images))
# Make sure the shape is appropriate, we should get 10 class probabilities for 64 examples
print(ps.shape)

top_p, top_class = ps.topk(1, dim=1)  # Get top prediction
# Look at the most likely classes for the first 10 examples
print(top_class[:10,:])

equals = top_class == labels

equals = top_class == labels.view(*top_class.shape)  # Compare predictions to actual labels

accuracy = torch.mean(equals.type(torch.FloatTensor))
print(f'Accuracy: {accuracy.item()*100}%')

# turn off gradients
with torch.no_grad():  # Disable gradient computation
    # validation pass here
    for images, labels in testloader:  # Loop through test batches
        ...

model = Classifier()
criterion = nn.NLLLoss()  # Negative log-likelihood loss
optimizer = optim.Adam(model.parameters(), lr=0.003)  # Adam optimizer

epochs = 15  # Number of training epochs

train_losses, test_losses = [], []
for e in range(epochs):  # Loop through each epoch
    tot_train_loss = 0
    for images, labels in trainloader:
        optimizer.zero_grad()  # Clear previous gradients
        
        log_ps = model(images)
        loss = criterion(log_ps, labels)  # Calculate loss
        tot_train_loss += loss.item()
        
        loss.backward()  # Backpropagate gradients
        optimizer.step()  # Update model weights
    else:  # After epoch completes
        tot_test_loss = 0
        test_correct = 0  # Number of correct predictions on the test set

        ## TODO: Implement the validation pass and print out the validation accuracy
        # Turn off gradients for validation, saves memory and computations
        with torch.no_grad():  # Disable gradient computation
            for images, labels in testloader:  # Loop through test batches
                log_ps = 
                loss = 
                tot_test_loss += loss.item()

                ps = 
                top_p, top_class = 
                equals = 
                test_correct += equals.sum().item()

        # Get mean loss to enable comparison between train and test sets
        train_loss = tot_train_loss / len(trainloader.dataset)
        test_loss = tot_test_loss / len(testloader.dataset)

        # At completion of epoch
        train_losses.append(train_loss)  # Save train loss
        test_losses.append(test_loss)  # Save test loss

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(train_loss),
              "Test Loss: {:.3f}.. ".format(test_loss),
              "Test Accuracy: {:.3f}".format(test_correct / len(testloader.dataset)))

%matplotlib inline  # Enable inline plotting in notebook
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt  # Import matplotlib for visualization

plt.plot(train_losses, label='Training loss')  # Plot training loss curve
plt.plot(test_losses, label='Validation loss')  # Plot test/validation loss curve
plt.legend(frameon=False)  # Add legend to plot

class Classifier(nn.Module):  # Define neural network class
    def __init__(self):  # Initialize network layers
        super().__init__()  # Initialize parent class
        self.fc1 = nn.Linear(784, 256)  # First fully connected layer
        self.fc2 = nn.Linear(256, 128)  # Second fully connected layer
        self.fc3 = nn.Linear(128, 64)  # Third fully connected layer
        self.fc4 = nn.Linear(64, 10)  # Output layer
        
        # Dropout module with 0.2 drop probability
        self.dropout = nn.Dropout(p=0.2)  # Dropout for regularization
        
    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)  # Flatten: (batch, 1, 28, 28) -> (batch, 784)
        
        # Now with dropout
        x = self.dropout(F.relu(self.fc1(x)))  # FC1 -> ReLU -> Dropout
        x = self.dropout(F.relu(self.fc2(x)))  # FC2 -> ReLU -> Dropout
        x = self.dropout(F.relu(self.fc3(x)))  # FC3 -> ReLU -> Dropout
        
        # output so no dropout here
        x = F.log_softmax(self.fc4(x), dim=1)  # FC4 -> LogSoftmax output
        
        return x  # Return output tensor

## TODO: Define your model with dropout added

model = Classifier()
criterion = nn.NLLLoss()  # Negative log-likelihood loss
optimizer = optim.Adam(model.parameters(), lr=0.003)  # Adam optimizer

epochs = 15  # Number of training epochs
steps = 0  # Initialize step counter

train_losses, test_losses = [], []  # Initialize lists to track losses
for e in range(epochs):
    running_loss = 0  # Initialize loss accumulator
    for images, labels in trainloader:  # Loop through training batches
        
        optimizer.zero_grad()  # Clear previous gradients
        
        log_ps = model(images)
        loss = criterion(log_ps, labels)  # Calculate loss
        loss.backward()  # Backpropagate gradients
        optimizer.step()  # Update model weights
        
        running_loss += loss.item()
        
    else:
        test_loss = 0  # Reset test loss
        accuracy = 0  # Reset accuracy counter
        
        # Turn off gradients for validation, saves memory and computations
        with torch.no_grad():  # Disable gradient computation
            model.eval()  # Set to evaluation mode
            for images, labels in testloader:  # Loop through test batches
                log_ps = model(images)  # Forward pass: get log-probabilities
                test_loss += criterion(log_ps, labels)
                
                ps = torch.exp(log_ps)  # Convert log-probs to probabilities
                top_p, top_class = ps.topk(1, dim=1)  # Get top prediction
                equals = top_class == labels.view(*top_class.shape)  # Compare predictions to actual labels
                accuracy += torch.mean(equals.type(torch.FloatTensor))
        
        model.train()  # Set to training mode
        
        train_losses.append(running_loss/len(trainloader))  # Save average training loss
        test_losses.append(test_loss/len(testloader))  # Save average test loss

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(train_losses[-1]),
              "Test Loss: {:.3f}.. ".format(test_losses[-1]),
              "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))

%matplotlib inline  # Enable inline plotting in notebook
%config InlineBackend.figure_format = 'retina'

import matplotlib.pyplot as plt  # Import matplotlib for visualization

plt.plot(train_losses, label='Training loss')  # Plot training loss curve
plt.plot(test_losses, label='Validation loss')  # Plot test/validation loss curve
plt.legend(frameon=False)  # Add legend to plot

# Import helper module (should be in the repo)
import helper  # Import helper visualization functions

# Test out your network!

model.eval()  # Set to evaluation mode

dataiter = iter(testloader)
images, labels = next(dataiter)  # Get one batch of images and labels
img = images[0]
# Convert 2D image to 1D vector
img = img.view(1, 784)

# Calculate the class probabilities (softmax) for img
with torch.no_grad():  # Disable gradient computation
    output = model.forward(img)

ps = torch.exp(output)  # Convert log-probs to probabilities

# Plot the image and probabilities
helper.view_classify(img.view(1, 28, 28), ps, version='Fashion')  # Display classification result

Inference and Validation¶

Overfitting¶

Inference¶

Next Up!¶