import torch
import torch.nn as nn
from torch.nn.functional import mse_loss

list_of_lists = [
  [1, 2, 3],
  [4, 5, 6],
]

# This is just a normal python list
print(list_of_lists)

# And now we make a PyTorch tensor with the same data
data = torch.tensor(list_of_lists)
print(data)

# We can, of course, create the tensor directly from a list (of lists)
data = torch.tensor([
    [0, 1],
    [2, 3],
    [4, 5],
])
print(data)

# ValueError: tensors cannot have different lengths at different indices
torch.tensor([
    [0, 1],
    [2, 3, 4],
])

# Initializing a tensor with an explicit data type
# Notice the dots after the numbers in the output, which specify that they're floats
data = torch.tensor([
    [0, 1],
    [2, 3],
    [4, 5],
], dtype=torch.float32)
print(data)

# Initializing a tensor with an implicit data type
# Notice the dots after the numbers, which specify that they're floats
data = torch.tensor([
    [0.11111111, 1],
    [2, 3],
    [4, 5],
])
print(data, data.dtype)

# Contrast that to this version, which contains ints
data = torch.tensor([
     [0, 1],
     [2, 3],
     [4, 5],
])
print(data, data.dtype)

torch.zeros(2, 5)  # a tensor of all zeros

torch.ones(3, 4)  # a tensor of all ones

torch.rand(3,3)  # a tensor with random numbers within (0,1)

rr = torch.arange(1, 10)  # range from [1, 10)
print(rr)

print(rr + 2)
print(rr * 2)
print(rr ** 2)
print(rr % 3)

a = torch.tensor([[1, 2], [2, 3], [4, 5]])      # (3, 2)
b = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])  # (2, 4)  (3, 4)

print("A is", a)
print("B is", b)

# a.matmul(b) and a@b do the same thing -- matrix multiply
print("a.matmul(b) is", a.matmul(b))
print("a @ b is", a @ b)

matr_2d = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(matr_2d.shape)
print(matr_2d)

matr_3d = torch.tensor([[[1, 2, 3, 4], [-2, 5, 6, 9]], [[5, 6, 7, 2], [8, 9, 10, 4]], [[-3, 2, 2, 1], [4, 6, 5, 9]]])
print(matr_3d)
print(matr_3d.shape)

rr = torch.arange(1, 16)
print(rr)
print(rr.shape)

rr = rr.view(5, 3)
print(rr)
print(rr.shape)

import numpy as np

# numpy.ndarray --> torch.Tensor:
arr = np.array([[1, 0, 5]])
data = torch.tensor(arr)
print("This is a", type(data), data)

# torch.Tensor --> numpy.ndarray:
new_arr = data.numpy()
print("This is a", type(new_arr), new_arr)

data = torch.arange(1, 36, dtype=torch.float32).reshape(5, 7)
print("Data is:", data)

# We can perform operations like *sum* over each row...
print("Taking the sum over columns:")
print(data.sum(dim=0))

# or over each column.
print("Taking thep sum over rows:")
print(data.sum(dim=1))

# Other operations are available:
print("Taking the stdev over rows:")
print(data.std(dim=1))

data.sum()

x = torch.Tensor([
    [[1, 2], [3, 4]],
    [[5, 6], [7, 8]],
    [[9, 10], [11, 12]]
])
print(x)
print(x.shape)

# Access the 0th element, which is the first row
x[0]  # Equivalent to x[0, :, :]

x[:, 0]

x[1, :, 0]

# construct an example tensor
x = torch.tensor([-1.5330,  0.4530, -0.7361, -0.3403,  2.4078])
x

# sigmoid is predefined for you within PyTorch and can be applied to any tensor
torch.sigmoid(x)

# sigmoid can also be called using this alternate notation
x.sigmoid()

# positive values remain, while negative values become zero
torch.relu(x)

# relu can also be called using the alternate notation
x.relu()

def generate_data(N):
    X = torch.randn(N, 3)
    Y = (torch.stack([
        (X[:, 0] + X[:, 1]) * (X[:, 2] > 0) + (X[:, 0] - X[:, 1]) * (X[:, 2] < 0),
        X.norm(dim=1)
    ]).t().abs() > 1.33).float()
    return X, Y

# fixed seed for reproducible results
torch.manual_seed(0)

X, Y = generate_data(20)

X

Y

# initialize random values for NN weights and biases
W1 = torch.randn(3, 4) * 0.2
B1 = torch.randn(1, 4) * 0.2
W2 = torch.randn(4, 2) * 0.2
B2 = torch.randn(1, 2) * 0.2

def forward(x):
    # TODO:

# the network's output should have the same shape as Y, but totally wrong values
y_hat = forward(X)
y_hat

# loss is a numerical representation of how "poorly" the NN did the task
loss = ((y_hat - Y)**2).sum()
loss

# there's also a built-in function to do this
mse_loss(y_hat, Y, reduction='sum')

def backprop(x):
    # TODO: forward pass
    z1 =
    h =
    z2 =
    y_hat =

    # TODO: backward pass
    y_hat_grad =
    z2_grad =
    h_grad =
    z1_grad =

    # calculate parameter gradients
    B2_grad = z2_grad.sum(0)
    W2_grad = (h[:,:,None] * z2_grad[:,None,:]).sum(0)
    B1_grad = z1_grad.sum(0)
    W1_grad = (X[:,:,None] * z1_grad[:,None,:]).sum(0)

    # output tuple of gradients for all parameters
    return W1_grad, B1_grad, W2_grad, B2_grad

# take note of the loss we have before optimization
y_hat = forward(X)
loss = ((y_hat - Y)**2).sum()
loss

# perform one step of gradient descent
lr = 0.01

W1_grad, B1_grad, W2_grad, B2_grad = backprop(X)
W1 -= W1_grad * lr
B1 -= B1_grad * lr
W2 -= W2_grad * lr
B2 -= B2_grad * lr

# forward pass again, notice the change in loss
y_hat = forward(X)
loss = ((y_hat - Y)**2).sum()
loss

# x is just an example tensor
# `requires_grad_` tells PyTorch to store gradients for the tensor
x = torch.tensor([2., 3., 4.]).requires_grad_(True)

# `x.grad` is currently empty, because we haven't called `backward()` on anything
print(x.grad)

y = (x**2 + x).sum()
print(y)
y.backward()
print(x.grad)

# Notice that this is the same calculation, but the gradients increase!
# This shows that `.backward()` adds to the gradients without resetting them
y = (x**2 + x).sum()
print(y)
y.backward()
print(x.grad)

# `requires_grad_` tells pytorch to save gradients for those tensors
W1.requires_grad_(True)
B1.requires_grad_(True)
W2.requires_grad_(True)
B2.requires_grad_(True)

# remove the gradients so running this cell repeatedly doesn't break anything
W1.grad = None
B1.grad = None
W2.grad = None
B2.grad = None

y_hat = forward(X)

# this time we use `mse_loss` built into PyTorch, as it is, in fact, a deep learning framework
loss = mse_loss(y_hat, Y, reduction='sum')
loss.backward()  # this is the line which actually calculates gradients!
loss

# W1.grad now contains the gradients of W1 without having to manually calculate them!
print('PyTorch Autograd:')
print(W1.grad)

# If we *do* manually calculate it, we _should_ find them to be equal
with torch.no_grad():
    W1_grad, B1_grad, W2_grad, B2_grad = backprop(X)

print('Manually calculated gradient:')
print(W1_grad)

print(f'W1 grad distance:{(W1_grad - W1.grad).norm().item():.10f}')
print(f'B1 grad distance:{(B1_grad - B1.grad).norm().item():.10f}')
print(f'W2 grad distance:{(W2_grad - W2.grad).norm().item():.10f}')
print(f'B2 grad distance:{(B2_grad - B2.grad).norm().item():.10f}')

class Model(nn.Module):  # replace "Model" with what you want to call the network class
    def __init__(self):  # constructor defined as `__init__`. You can also provide args.
        super().__init__()  # call the super class's constructor for PyTorch to properly register it as an `nn.Module`
        # store the model's parameters in fields
        self.conv1 = nn.Conv2d(1, 20, 5)  # nn.Conv2d is one of PyTorch's many built-in layers
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):  # the `forward` method is how you run the model, and `__call__` gets aliased to it
        # this is where you should implement the forward pass
        x = self.conv1(x).relu()
        return self.conv2(x).relu()

# Here's another example network defined using `nn.Module`, and
# using `nn.Sequential` internally to organize the strucure better

class ExampleNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.main = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 10),
        )

    def forward(self, x):
        return self.main(x)

# We can instantiate an instance of this example network with the constructor
model = ExampleNetwork()
# `nn.Module` exposes a simple display function just by `print`ing the model
print(model)
# Run the model on data just by calling the model
model(torch.randn(1, 28, 28))

class ToyModel(nn.Module):
    def __init__(self):
        super().__init__()
        # TODO

    def forward(self, x):
        # TODO

model = ToyModel()
model

y_hat = model(X)
loss = mse_loss(y_hat, Y)
loss

model = ToyModel()

# TODO: optimize the model

# Exercise 5: Don't modify this cell, but make sure to run it.
y_hat = model(X)
loss = mse_loss(y_hat, Y)
loss

X2, Y2 = generate_data(100)
y_hat = model(X2)
loss = mse_loss(y_hat, Y2)
loss

CSE447/517: Assignment 0, PyTorch/ML review (Winter '24)¶

Introduction¶

Part 1: Tensors¶

Part 2: Implementing a Neural Network By Hand¶

Exercise 1¶

Exercise 2¶

Exercise 3¶

Part 3: PyTorch is a Deep Learning Framework¶

Using Autograd on our Toy Neural Network¶

Neural Network Modules¶

Exercise 4¶

Exercise 5¶