# Import necessary packages

%matplotlib inline  # Enable inline plotting in notebook
%config InlineBackend.figure_format = 'retina'

import numpy as np  # Import NumPy
import torch  # Import PyTorch library

import helper  # Import helper visualization functions

import matplotlib.pyplot as plt  # Import matplotlib for visualization

# The MNIST datasets are hosted on yann.lecun.com that has moved under CloudFlare protection
# Run this script to enable the datasets download
# Reference: https://github.com/pytorch/vision/issues/1938

from six.moves import urllib
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib.request.install_opener(opener)

### Run this cell

from torchvision import datasets, transforms  # Import datasets and image transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),  # Define transform: convert to tensor
                              transforms.Normalize((0.5,), (0.5,)),  # Normalize: mean=0.5, std=0.5
                              ])  # End of transforms list

# Download and load the training data
trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)  # Download/load MNIST training data
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)  # Create training data loader

for image, label in trainloader:
    ## do things with images and labels

dataiter = iter(trainloader)  # Create iterator over training data
images, labels = next(dataiter)  # Get one batch of images and labels
print(type(images))
print(images.shape)  # Print shape: (batch_size, channels, height, width)
print(labels.shape)  # Print shape: (batch_size,)

plt.imshow(images[1].numpy().squeeze(), cmap='Greys_r');

def activation(x):
    return 1/(1+torch.exp(-x))  # Sigmoid formula: 1/(1+e^(-x))

# Flatten the input images to be a vector
inputs = images.view(images.shape[0], -1)

# Create random parameters for weights and biases
w1 = torch.randn(784, 256)        # input layer is 28x28=784, and hidden layer is choose to be 256
b1 = torch.randn(256)             # baises is 256 based on the number of nodes in the hidden layer

w2 = torch.randn(256, 10)        # hidden layer is 256, and output layer is 10 ( output numbers between 0 to 9)
b2 = torch.randn(10)

# TO DO: Calculate the output of hidden layer and output layer
h = 

out =

def softmax(x):
    ## # Softmax: normalize each row of exp(x) using row-wise sums (dim=1)
    return torch.exp(x)/torch.sum(torch.exp(x),dim=1).view(-1,1)

# Here, out should be the output of the network in the previous excercise with shape (64,10)
probabilities = softmax(out)

# Does it have the right shape? Should be (64, 10)
print(probabilities.shape)
# Does it sum to 1?
print(probabilities.sum(dim=1))

from torch import nn  # Import neural network module

class Network(nn.Module):  # Define neural network class
    def __init__(self):  # Initialize network layers
        super().__init__()  # Initialize parent class
        
        # Inputs to hidden layer linear transformation
        self.hidden = nn.Linear(784, 256)  # Hidden layer
        # Output layer, 10 units - one for each digit
        self.output = nn.Linear(256, 10)  # Output layer
        
        # Define sigmoid activation and softmax output 
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        # Pass the input tensor through each of our operations
        x = self.hidden(x)
        x = self.sigmoid(x)
        x = self.output(x)
        x = self.softmax(x)
        
        return x  # Return output tensor

class Network(nn.Module):  # Define neural network class

# Create the network and look at it's text representation
model = Network()  # Instantiate the Network model
model

import torch.nn.functional as F  # Import functional API (F.relu, etc.)

class Network(nn.Module):  # Define neural network class
    def __init__(self):  # Initialize network layers
        super().__init__()
        # Inputs to hidden layer linear transformation
        self.hidden = nn.Linear(784, 256)  # Hidden layer
        # Output layer, 10 units - one for each digit
        self.output = nn.Linear(256, 10)  # Output layer
        
    def forward(self, x):
        # Hidden layer with sigmoid activation
        x = F.sigmoid(self.hidden(x))
        # Output layer with softmax activation
        x = F.softmax(self.output(x), dim=1)
        
        return x  # Return output tensor

## Your solution here
# Define a neural network with three fully connected layers
# The first hidden layer (fc1), second hidden layer (fc2), output layer
# Forward pass:
# pass the input through each layer,
# apply ReLU to hidden layers,
# and softmax to the output layer

print(model.fc1.weight)
print(model.fc1.bias)

# Set biases to all zeros
model.fc1.bias.data.fill_(0)

# sample from random normal with standard dev = 0.01
model.fc1.weight.data.normal_(std=0.01)

# Grab some data 
dataiter = iter(trainloader)  # Create iterator over training data
images, labels = next(dataiter)  # Get one batch of images and labels

# Resize images into a 1D vector, new shape is (batch size, color channels, image pixels) 
images.resize_(64, 1, 784)
# or images.resize_(images.shape[0], 1, 784) to automatically get batch size

# Forward pass through the network
img_idx = 0
ps = model.forward(images[img_idx,:])

img = images[img_idx]
helper.view_classify(img.view(1, 28, 28), ps)  # Display classification result

# Hyperparameters for our network
input_size = 784
hidden_sizes = [128, 64]
output_size = 10

# Build a feed-forward network
model = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.ReLU(),
                      nn.Linear(hidden_sizes[1], output_size),
                      nn.Softmax(dim=1))
print(model)  # Display model architecture

# Forward pass through the network and display output
images, labels = next(iter(trainloader))  # Get one batch of images and labels
images.resize_(images.shape[0], 1, 784)
ps = model.forward(images[0,:])
helper.view_classify(images[0].view(1, 28, 28), ps)  # Display classification result

print(model[0])  # End of transforms list
model[0].weight

from collections import OrderedDict  # Import OrderedDict for named layers
model = nn.Sequential(OrderedDict([  # Create sequential model with named layers
                      ('fc1', nn.Linear(input_size, hidden_sizes[0])),
                      ('relu1', nn.ReLU()),
                      ('fc2', nn.Linear(hidden_sizes[0], hidden_sizes[1])),
                      ('relu2', nn.ReLU()),
                      ('output', nn.Linear(hidden_sizes[1], output_size)),
                      ('softmax', nn.Softmax(dim=1))]))
model

print(model[0])  # End of transforms list
print(model.fc1)

Neural networks with PyTorch¶

Building networks with PyTorch¶

Activation functions¶

Your Turn to Build a Network¶

Initializing weights and biases¶

Forward pass¶

Using `nn.Sequential`¶

Neural networks with PyTorch¶

Building networks with PyTorch¶

Activation functions¶

Your Turn to Build a Network¶

Initializing weights and biases¶

Forward pass¶

Using nn.Sequential¶

Using `nn.Sequential`¶