import torch
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
import torchvision.transforms as transforms
import glob
import PIL.Image
import os
import numpy as np

!unzip -q road_following.zip

unzip:  cannot find or open road_following.zip, road_following.zip.zip or road_following.zip.ZIP.

def get_x(path, width):
    """Gets the x value from the image filename"""
    return (float(int(path.split("_")[1])) - width/2) / (width/2)

def get_y(path, height):
    """Gets the y value from the image filename"""
    return (float(int(path.split("_")[2])) - height/2) / (height/2)

class XYDataset(torch.utils.data.Dataset):
    
    def __init__(self, directory, random_hflips=False):
        self.directory = directory
        self.random_hflips = random_hflips
        self.image_paths = glob.glob(os.path.join(self.directory, '*.jpg'))
        self.color_jitter = transforms.ColorJitter(0.3, 0.3, 0.3, 0.3)
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        
        image = PIL.Image.open(image_path)
        width, height = image.size
        x = float(get_x(os.path.basename(image_path), width))
        y = float(get_y(os.path.basename(image_path), height))
      
        if float(np.random.rand(1)) > 0.5:
            image = transforms.functional.hflip(image)
            x = -x
        
        image = self.color_jitter(image)
        image = transforms.functional.resize(image, (224, 224))
        image = transforms.functional.to_tensor(image)
        image = image.numpy()[::-1].copy()
        image = torch.from_numpy(image)
        image = transforms.functional.normalize(image, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        
        return image, torch.tensor([x, y]).float()
    
dataset = XYDataset('dataset_xy', random_hflips=False)

test_percent = 0.1
num_test = int(test_percent * len(dataset))
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [len(dataset) - num_test, num_test])

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=0
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=8,
    shuffle=True,
    num_workers=0
)

model = models.resnet18(weights='DEFAULT')

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /data/models/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 70.4MB/s]

model.fc = torch.nn.Linear(512, 2)
device = torch.device('cuda')
model = model.to(device)

NUM_EPOCHS = 70
BEST_MODEL_PATH = 'best_steering_model_xy.pth'
best_loss = 1e9

optimizer = optim.Adam(model.parameters())

for epoch in range(NUM_EPOCHS):
    
    model.train()
    train_loss = 0.0
    for images, labels in iter(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        train_loss += float(loss)
        loss.backward()
        optimizer.step()
    train_loss /= len(train_loader)
    
    model.eval()
    test_loss = 0.0
    for images, labels in iter(test_loader):
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        loss = F.mse_loss(outputs, labels)
        test_loss += float(loss)
    test_loss /= len(test_loader)
    
    print('%f, %f' % (train_loss, test_loss))
    if test_loss < best_loss:
        torch.save(model.state_dict(), BEST_MODEL_PATH)
        best_loss = test_loss

/tmp/ipykernel_2462/3355174262.py:28: DeprecationWarning: Conversion of an array with ndim > 0 to a scalar is deprecated, and will error in future. Ensure you extract a single element from your array before performing this operation. (Deprecated NumPy 1.25.)
  if float(np.random.rand(1)) > 0.5:

1.461253, 68.896118
0.379657, 15.887159
0.180744, 0.432669
0.171789, 0.336726
0.160095, 0.188593
0.174291, 0.131379
0.150816, 0.126502
0.162418, 0.101258
0.186127, 0.090007
0.159636, 0.046060
0.165355, 0.106158
0.181139, 0.066160
0.166299, 0.042958
0.177655, 0.073267
0.171839, 0.032873
0.141438, 0.117093
0.159239, 0.049124
0.180595, 0.138526
0.193431, 0.096698
0.155408, 0.091011
0.146099, 0.057740
0.137881, 0.118455
0.170031, 0.069965
0.165322, 0.078236
0.150844, 0.078809
0.143889, 0.100402
0.137227, 0.077092
0.151465, 0.067214
0.149363, 0.071974
0.160229, 0.029512
0.130077, 0.166332
0.118133, 0.090713
0.143178, 0.064750
0.139555, 0.079991
0.131163, 0.093569
0.130798, 0.099361
0.126938, 0.073072
0.146277, 0.061280
0.146091, 0.084018
0.151560, 0.169702
0.124117, 0.066804
0.124103, 0.061388
0.117967, 0.070555
0.127095, 0.060437
0.143876, 0.161006
0.111974, 0.101690
0.136170, 0.121649
0.119486, 0.125418
0.111126, 0.160980
0.122715, 0.101768
0.115224, 0.051788
0.120947, 0.109113
0.116468, 0.054965
0.131237, 0.066182
0.102397, 0.072121
0.113371, 0.108749
0.139890, 0.062482
0.135935, 0.104374
0.115301, 0.054823
0.094297, 0.162255
0.119488, 0.071927
0.145756, 0.187926
0.119822, 0.093428
0.124774, 0.236734
0.103701, 0.062766
0.103254, 0.082612
0.106045, 0.029226
0.093366, 0.034279

Road Follower - Train Model¶

Download and extract data¶

Create Dataset Instance¶

Split dataset into train and test sets¶

Create data loaders to load data in batches¶

Define Neural Network Model¶

Train Regression:¶