텐서

import torch
import numpy as np

data = [[1, 2], [3, 4]]
x_data = torch.tensor(data)

np_array = np.array(data)
x_np = torch.from_numpy(np_array)

print(x_data)
print(x_np)

tensor([[1, 2],
        [3, 4]])
tensor([[1, 2],
        [3, 4]])

일반 리스트 데이터, 넘파이 데이터를 텐서로 만들 수 있습니다.

tensor = torch.rand(3, 4)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

if torch.cuda.is_available():
  tensor = tensor.to('cuda')
  print(f"Device tensor is stored on: {tensor.device}")

Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu
Device tensor is stored on: cuda:0

텐서의 속성에는 모양, 자료형, 어느 장치에 저장되는지가 있습니다.

사용할 수 있는 gpu가 있다면 사용이 되는 모습입니다.

tensor = torch.ones(4, 4)
tensor[:,1] = 0
print(tensor)

print(f"tensor.matmul(tensor.T) \n {tensor.matmul(tensor.T)} \n")
# 다른 문법:
print(f"tensor @ tensor.T \n {tensor @ tensor.T}")

tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])
tensor.matmul(tensor.T) 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]]) 

tensor @ tensor.T 
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])

텐서는 넘파이와 같이 값을 변경해줄 수 있습니다. 또한 텐서간 @ 연산자를 사용하면 행렬 곱 연산이 가능합니다.

분류기 학습하기

데이터 불러오고 정규화하기

import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

데이터를 불러왔을때 바로 전처리 할수 있는 툴을 transforms 클래스를 이용해 구현했습니다.

trainset = torchvision.datasets.CIFAR10(root = './data', train = True,
                                        download = True, transform = transform)

testset = torchvision.datasets.CIFAR10(root = './data', train = False,
                                       download = True, transform = transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz
Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified

CIFAR10 데이터를 불러옵니다. 이때 앞서 구현한 transform을 이용해 -1 ~ 1 범위로 정규화한 텐서로 변환합니다.

batch_size = 4

trainloader = torch.utils.data.DataLoader(trainset, batch_size = batch_size,
                                          shuffle = True, num_workers = 2)

testloader = torch.utils.data.DataLoader(testset, batch_size = batch_size,
                                         shuffle = True, num_workers = 2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

데이터를 배치단위로 묶어서 로더로 만듭니다.

import matplotlib.pyplot as plt
import numpy as np

def imshow(img):
    img = img / 2 + 0.5 # -1 ~ 1 사이 값을 0 ~ 1 사이 값으로 변환
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

dataiter = iter(trainloader)
images, labels = dataiter.next()

imshow(torchvision.utils.make_grid(images))
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

horse truck truck horse

학습용 이미지에는 무엇이 있는지 실제로 시각적으로 관찰하는 코드입니다.

우선 먼저 봐야할께 iter과 next 함수입니다. iter은 iterable 한 객체(반복가능한)에 적용하는 함수로 iterator 객체로 변환시킵니다.

iterator 객체는 한번에 하나씩 객체 내 요소를 순서대로 엑세스가 가능합니다. 자료를 가져온 이후 폐기하기 때문에 메모리 절약이 가능합니다.

그 뒤 next 함수를 통해 iterator 객체 값을 다음값으로 넘기고, 이전값을 반환하게 됩니다.

다음으로 torchvision 내 utils.make_grid 함수를 이용해 이미지 배치 데이터(4차원 형식)를 입력받으면 실제 이미지를 출력합니다.

합성곱 신경망 정의하기

import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5) # 인풋 채널, 아웃풋 채널, 커널 사이즈
        self.pool = nn.MaxPool2d(2, 2) # 커널, 스트라이드 값, 특징 맵 크기가 반이됨.
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 =  nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x))) # 3 * 32 * 32 => 6 * 28 * 28 => 6 * 14 * 14
        x = self.pool(F.relu(self.conv2(x))) # 6 * 14 * 14 => 16 * 10 * 10 => 16 * 5 * 5
        x = torch.flatten(x, 1) # 채널 포함 1차원화시킴.
        x = F.relu(self.fc1(x)) # 16 * 5 * 5 => 120
        x = F.relu(self.fc2(x)) # 120 => 84
        x = self.fc3(x) # 84 => 10(10개로 분류하기 때문에 원핫 인코딩 꼴로 변환)
        
        return x

net = Net()

nn.Module 클래스를 상속해 합성곱 신경망을 정의했습니다.

init 함수 부분에는 신경망 함수를 선언하고, forward 함수 부분에서 선언한 신경망 함수를 실행했습니다.

torch.nn.functional 에서는 relu 등 여러가지 활성화 함수들이 있습니다.

import torch.optim as optim

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(net.parameters(), lr = 0.001, momentum = 0.9)

손실 함수로 크로스 엔트로피 함수를, 옵티마이저로 SGD를 사용했습니다.

손실 함수는 nn 클래스 내 존재하고 옵티마이저는 torch.optim 클래스 내 존재합니다.

신경망 학습하기

for epoch in range(2):

    running_loss = 0.0

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
       
        optimizer.zero_grad() # 매개변수를 0으로 만듭니다.

        outputs = net(inputs) # 입력값을 넣어 순전파를 진행시킨뒤 결과값 배출
        loss = criterion(outputs, labels) # 결과와 실제 값을 손실함수에 대입
        loss.backward() # 손실함수에서 역전파 수행
        optimizer.step() # 옵티마이저를 사용해 매개변수 최적화

        running_loss += loss.item()
        if i % 2000 == 1999:
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

[1,  2000] loss: 2.222
[1,  4000] loss: 1.880
[1,  6000] loss: 1.696
[1,  8000] loss: 1.602
[1, 10000] loss: 1.537
[1, 12000] loss: 1.480
[2,  2000] loss: 1.409
[2,  4000] loss: 1.379
[2,  6000] loss: 1.358
[2,  8000] loss: 1.358
[2, 10000] loss: 1.308
[2, 12000] loss: 1.298

2 에포크로 트레인 데이터 로더를 사용하고, 앞서 정의한 모델, 옵티마이저, 손실함수를 사용합니다.

신경망 학습 과정은 옵티마이저 초기화하기 => 순전파 진행으로 output 값 배출 => 손실함수 사용해서 loss값 배출 => 손실함수 역전파 수행 => 옵티마이저 사용 매개변수 최적화 과정으로 진행됩니다.

테스트 데이터로 모델 검정하기

dataiter = iter(testloader)
images, labels = dataiter.next()

imshow(torchvision.utils.make_grid(images))
print('GroundTruth: ', ' '.join(f'{classes[labels[j]]:5s}' for j in range(4)))

GroundTruth:  ship  plane deer  ship

테스트 데이터 로더 내 첫번째 배치 데이터를 사용해 시각적으로 검정하겠습니다. 실제 값을 출력한 모습이죠.

outputs = net(images) 
# 모델 내 인풋값을 넣으면 원핫인코딩 방식으로 출력됩니다.

_, predicted = torch.max(outputs, 1) 
# torch.max 함수를 사용해 배치 내 데이터 당 최댓 값을 찾아줍니다.
# 첫 출력은 최댓값 그 자체를, 두 번째 출력은 몇번 레이블인지 찾아줍니다.
# 첫 출력은 관심대상이 아니므로 '_'를 사용하여 메모리를 절약합니다.

print('Predicted: ', ' '.join(f'{classes[predicted[j]]:5s}'
                              for j in range(4)))

Predicted:  ship  ship  deer  ship

테스트 첫번째 배치 데이터를 모델에 넣어서 레이블을 예측했습니다. 2번째 plane 빼고 맞췄습니다!

correct = 0
total = 0

with torch.no_grad():
# 이 내부서에 생성된 텐서들은 requires_grad=False 상태가 되어 gradient 연산이 불가능해집니다.
# 가중치 업데이트가 필요한 부분이 아니기 때문에 메모리 절약 차원입니다.
    for data in testloader:
        images, labels = data
        outputs = net(images)

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

Accuracy of the network on the 10000 test images: 53 %

전체 테스트 데이터 중 53% 정도를 맞췄습니다. 분류 레이블 개수가 10개인걸 감안하면 엄청 나쁜 수치는 아닙니다.

correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# 변화도는 여전히 필요하지 않습니다
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predictions = torch.max(outputs, 1)
        # 각 분류별로 올바른 예측 수를 모읍니다
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# 각 분류별 정확도(accuracy)를 출력합니다
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

Accuracy for class: plane is 67.6 %
Accuracy for class: car   is 56.8 %
Accuracy for class: bird  is 47.7 %
Accuracy for class: cat   is 33.9 %
Accuracy for class: deer  is 57.0 %
Accuracy for class: dog   is 33.1 %
Accuracy for class: frog  is 71.5 %
Accuracy for class: horse is 50.5 %
Accuracy for class: ship  is 74.3 %
Accuracy for class: truck is 42.5 %

어느 클래스를 더 잘 분류하고, 어느 클래스는 잘 분류하지 못했는지 찾아봤습니다.

다만 정확도의 한계로 단순히 한 클래스를 많이 예측한 경우도 있기 때문에 전적으로 신뢰할 결과는 아닙니다.

GPU에서 학습하기

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print(device)

cuda:0

GPU를 사용하고 있군요.

net.to(device)

inputs, labels = data[0].to(device), data[1].to(device)

모델과 입력, 레이블 값들을 GPU로 보내면 정상적인 GPU 연산이 가능해집니다.

느낀점

파이토치라는 딥러닝 도구 사용법을 익히기 위해 쉬운 예제부터 시작했습니다.

예제 자체는 무슨말인지 알고 있으나, 파이토치 내 어느 클래스에서 어떤 함수를 가져오는지를 중점적으로 학습했습니다.

생각보다 어렵네요. 낯선 부분이 다소 있습니다. 하지만 하나하나 알아가는 기분이 좋네요.

간단한 예제를 학습했는데, 다음엔 상대적으로 더 복잡한 다른 코드를 리뷰해보도록 하겠습니다.

참고 : https://tutorials.pytorch.kr/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py