신경망의 구성요소: 연산

def assert_same_shape(array, array_grad):
    assert array.shape == array_grad.shape
    return None

class Operation(object):
    
    def __init__(self):
        pass

    def forward(self, input_): # input_은 ndarray
        self.input_ = input_
        self.output = self._output()
        return self.output

    def backward(self, output_grad): # output_grad은 ndarray
        assert_same_shape(self.output, output_grad)

        self.input_grad = self._input_grad(output_grad)

        assert_same_shape(self.input_, self.input_grad)
        return self.input_grad

    # 서브 클래스에서 오버라이딩 하기 위해 있는 부분
    # 이 클래스에서는 추상 메소드만 선언함
    def _output(self):
        raise NotImplementedError()

    def _input_grad(self, output_grad):
        raise NotImplementedError()

상속을 해주기 위한 간단한 연산 클래스를 만들었습니다.

forward 부분은 input_을 받고 아웃풋을 내는 함수를 호출합니다.

backward 부분은 output_grad를 받고 _input_grad 함수를 호출합니다.

여기서 assert_same_shape은 입력값이 정상인지 확인하는 역할을 합니다.

class ParamOperation(Operation):
    def __init__(self, param):
        super().__init__() # 상속 받은 클레스의 생성자를 실행해줘야함
        self.param = param # 파라미터도 입력받음.

    def backward(self, output_grad):
        
        assert_same_shape(self.output, output_grad)
        self.input_grad = self._input_grad(output_grad)
        self.param_grad = self._param_grad(output_grad)
        
        assert_same_shape(self.input_, self.input_grad)
        assert_same_shape(self.param, self.param_grad)

        return self.input_grad

    def _param_grad(self, output_grad):
        raise NotImplementedError()

파라미터가 있는 연산를 정의하기 위해 제일 기본적인 연산 클래스를 상속받아 만들었습니다.

backward 함수는 _input_grad, _param_grad(출력물에 대한 입력값/파라미터 기울기) 함수를 호출해 값을 구합니다.

이전 backward와 달라진점은 _param_grad 부분이 추가했다는 것입니다.

이 클래스를 상속하는 클래스는 _output, _input_grad, _pram_grad 함수를 정의해야합니다.

import numpy as np

# 신경망의 가중치 행렬 곱 연산, 순방향/역방향 모두 제공.
class WeightMultiply(ParamOperation):
    def __init__(self, W):
        super().__init__(W)

    # Operation 클래스 내 forward 함수에서 내부 호출 당하는 함수, 순방향 출력
    def _output(self):        
        return np.dot(self.input_, self.param) # forward 함수 내 self.input_은 정해줌.

    # Operation 클래스 내 backward 함수에서 내부 호출 당하는 함수, 역방향 출력
    # 입력의 대한 기울기, 파라미터에 대한 기울기를 두 함수로 만듬.
    def _input_grad(self, output_grad): # 입력에 대한 기울기 출력.
        return np.dot(output_grad, np.transpose(self.param, (1,0)))

    def _param_grad(self, output_grad):
        return np.dot(np.transpose(self.input_, (1,0)), output_grad)

ParamOperation를 상속해 신경망의 가중치 행렬 곱 연산을 하는 WeightMultiply를 만들었습니다.

class BiasAdd(ParamOperation):
    def __init__(self, B):
        assert B.shape[0] == 1
        # 상속받은 클래스(ParamOperation)의 생성자(param) 값에 B를 넣어줌.
        super().__init__(B)

    # forward 호출하는 내부함수.
    def _output(self):
        return self.input_ + self.param

    # backward에서 호출하는 내부함수(output_grad 이미 입력받음), 입력에 대한 기울기
    def _input_grad(self, output_grad):
        return np.ones_like(self.input_) * output_grad

    # 파라미터에 대한 기울기
    def _param_grad(self, output_grad):
        param_grad = np.ones_like(self.param) * output_grad
        return np.sum(param_grad, axis = 0).reshape(1, param_grad.shape[1])

편향을 더하는 연산도 마찬가지로 ParamOperation를 상속하여 만들었습니다.

class Sigmoid(Operation):
    def __init__(self):
        super().__init__()

    def _output(self):
        return 1.0/(1.0 + np.exp(-1.0 * self.input_))

    # 입력에 대한 기울기 계산
    def _input_grad(self, output_grad):
        sigmoid_backward = self.output * (1.0 - self.output)
        input_grad = sigmoid_backward * output_grad
        return input_grad

시그모이드 연산은 파라미터가 없기 때문에 Operation 클래스를 상속했습니다.

class Linear(Operation):
    def __init__(self):     
        super().__init__()

    def _output(self):
        return self.input_

    def _input_grad(self, output_grad):
        return output_grad

입력을 받은 대로 출력해주는 Linear 클래스 입니다.

신경망의 구성요소: 층

class Layer(object):
    # 뉴런의 개수는 층의 너비에 해당
    def __init__(self, neurons): # neurons : 층의 너비
        self.neurons = neurons
        self.first = True
        self.params = []
        self.param_grads = []
        self.operations = []

    # 층을 구현하는 메서드
    def _setup_layer(self, num_in):
        raise NotImplementedError()

    # 입력값을 연산에 순서대로 통과시켜 순방향 계산을 함.
    def forward(self, input_):
        if self.first: # 처음 층을 만드는 것이면, _setup_layer 함수 실행.
            self._setup_layer(input_) 
            self.first = False

        self.input_ = input_
        
        for operation in self.operations: # 여러개의 operations 들의 합
            input_ = operation.forward(input_)

        self.output = input_

        return self.output

    # output_grad를 각 연산에 역순으로 통과시켜 역방향 계산을 함
    def backward(self, output_grad):
        assert_same_shape(self.output, output_grad)

        for operation in reversed(self.operations):
            output_grad = operation.backward(output_grad)

        input_grad = output_grad
        self._param_grads()

        return input_grad

    # 각 operation 객체에서 _param_grad 값을 꺼냄
    def _param_grads(self):
        self.param_grads = []
        for operation in self.operations:
            if issubclass(operation.__class__, ParamOperation): # 서브클래스에 속하는가?
                self.param_grads.append(operation.param_grad)

    # 각 operationn 객체에서 _params 값을 꺼냄
    def _params(self):
        self.params = []
        
        for operation in self.operations:
            if issubclass(operation.__class__, ParamOperation):
                self.params.append(operation.param)

층을 정의합니다. 이때 Operation 객체의 리스트를 operations 속성에 담고 있습니다.

class Dence(Layer):
    def __init__(self, neurons, activation):
        super().__init__(neurons)
        self.activation = activation

    # 밀집층의 연산 정의
    def _setup_layer(self, input_):
        if self.seed:
            np.random.seed(self.seed)

        self.params = []
        # 가중치
        self.params.append(np.random.randn(input_.shape[1], self.neurons))

        #편향
        self.params.append(np.random.randn(1, self.neurons))

        self.operations = [WeightMultiply(self.params[0]), # 신경망의 가중치 행렬곱 연산
                           BiasAdd(self.params[1]), self.activation]

        return None

layer 클래스에 _setup_layer 함수를 추가로 구현하기 위해 Dence(밀집층) 클래스를 생성했습니다.

랜덤 시드를 받아서 파라미터 초기값에 랜덤값을 넣어줍니다.

여기서 operatings을 정의하는데 층을 하나 만들기 위해서 여러 연산 클래스를 사용합니다.

신경망의 구성요소: 손실함수

class Loss(object):
    def __init__(self):
        pass

    # 실제 손실값을 계산하는 함수
    def forward(self, prediction, target):
        assert_same_shape(prediction, target)

        self.prediction = prediction
        self.target = target

        loss_value = self._output()
        return loss_value
    
    # 손실함수의 입력값에 대해 손실의 기울기를 계산.
    def backward(self):
        self.input_grad = self._input_grad()

        assert_same_shape(self.prediction, self.input_grad)

        return self.input_grad

    def _output(slef):
        raise NotImplementedError()

    def _input_grad(slef):
        raise NotImplementedError()

손실함수를 구성하는 Loss 클래스 입니다. 타겟값과 예측값을 가지고 _output 함수를 돌려 loss를 구합니다.

backward 함수는 입력 값에 따른 손실의 기울기를 계산해줍니다.

class MeanSquaredError(Loss):
    def __init__(self):
        super().__init__()
    
    # 평균 제곱오차 손실함수
    def _output(self):
        loss = np.sum(np.power(self.prediction - self.target, 2)) / self.prediction.shape[0]
        return loss

    # 예측값에 대한 평균제곱오차 손실의 기울기를 계산
    def _input_grad(self):
        return 2.0 * (self.prediction - self.target) / self.prediction.shape[0]

Loss 클래스를 상속받아 MeanSquaredError 클래스를 만들었는데요. _output, _input_grad 두 함수의 수식을 구체적으로 구현했습니다.

딥러닝의 구성요소: 뉴런

class NeuralNetwork(object):
    def __init__(self, layers, loss, seed = 1):
        self.layers = layers # 신경망의 층 정의, layers 클래스를 받음.(리스트로 받을수도)
        self.loss = loss # loss 클래스를 받음.
        self.seed = seed
        if seed:
            for layer in self.layers:
                setattr(layer, 'seed', self.seed) # layer.seed = self.seed와 동일
    
    # 데이터를 각 층에 순서대로 통과시킴
    def forward(self, x_batch): # x_batch는 ndarray.
        x_out = x_batch
        for layer in self.layers:
            x_out = layer.forward(x_out)

        return x_out

    # 데이터를 각 층에 역순으로 통과시킴
    def backward(self, loss_grad):
        grad = loss_grad
        for layer in reversed(self.layers):
            grad = layer.backward(grad)
        return None

    def train_batch(self, x_batch, y_batch):
        # 순방향 계산 수행.
        predictions = self.forward(x_batch)
        # 손실값 계산
        loss = self.loss.forward(predictions, y_batch)
        # 역방향 계산 수행
        self.backward(self.loss.backward())
        return loss

    # 신경망의 파라미터 값을 받음
    def params(self):
        for layer in self.layers:
            yield from layer.params # 리스트에 있는 요소를 한개씩 밖으로 전달.

    # 신경망의 각 파라미터에 대한 손실값의 기울기를 받음.
    def param_grads(self):
        for layer in self.layers:
            yield from layer.param_grads

neural_network = NeuralNetwork(
    layers = [Dence(neurons = 13, activation = Sigmoid()),
              Dence(neurons = 1, activation = Linear())],
    loss = MeanSquaredError(),
    #learning_rata = 0.01
)

딥러닝 구현을 위해 NeuralNetwork 클래스를 구현했습니다. 앞서 구현한 층, 손실함수를 이용했습니다.

우선 layers로 층 클래스를 리스트로 받습니다. 층 클래스는 또 뉴런 개수와 활성화함수인 연산 클래스를 입력해야합니다.

또 loss에는 손실함수 클래스를 넣어주면 됩니다.

forward 부분에서는 입력된 x값을 여러 층에 차레대로 넣습니다. 층에서는 또 차레대로 연산을 해서 prediction을 출력합니다.

그 후 loss 클래스를 이용해서 손실값을 계산합니다. 다음으로 손실의 기울기를 backward 함수에 넣습니다.

backward 함수에서는 손실의 기울기를 여러 층에 앞선 차레와 반대 순서로 넣습니다.

이렇게 나온 backward 함수의 최종 값은 input의 기울기 입니다. 이 값을 통해 loss를 줄여가는게 학습에 방향이겠죠.

딥러닝의 구성요소: 옵티마이저

class Optimizer(object):
    def __init__(self, lr = 0.01):
        self.lr = lr

    def step(self):
        pass

옵티마이저의 간단한 추상클래스입니다. lr은 학습률을 의미합니다.

class SGD(Optimizer):
    def __init__(self, lr = 0.01):
        super().__init__(lr)

    def step(self):
        for (param, param_grad) in zip(self.net.params(), self.net.param_grads()):
            # 뉴런에 있는 파라미터들을 꺼내오는 함수를 씀.
            param -= self.lr * param_grad
            # 이게 과연 층이나 연산 클래스 내 param까지 영향을 끼칠까??

옵티마이저에서 step 부분을 SGD(확률적 경사 하강법)을 이용해서 구성한 모습입니다.

구체적으로 학습률 * (loss값에 영향을 주는 param_grad값)으로 param 값을 업데이트 해나가는 방식입니다.

딥러닝의 구성요소: Trainer

def permute_data(X, y):
    perm = np.random.permutation(X.shape[0]) # 크기만큼 데이터를 셔플해줌
    return X[perm], y[perm]

class Trainer(object):
    def __init__(self, net, optim): # net은 NeuralNetwork, optim은 Optimizer
        self.net = net
        self.optim = optim
        setattr(self.optim, 'net', self.net)

    def generate_batches(self, X, y, size = 32): # 배치 사이즈로 데이터를 쪼개는 함수.
        assert X.shape[0] == y.shape[0]
        N = X.shape[0]
        for ii in range(0, N, size):
            X_batch, y_batch = X[ii:ii+size], y[ii:ii+size] # 배치만큼 잘라서
            yield X_batch, y_batch # 지속적으로 내보냄

    def fit(self, X_train, y_train, X_test, y_test, epochs = 100,
            eval_every = 10, batch_size = 32, seed = 1, restart = True):
        # eval_every 주기로 테스트 데이터를 사용해 예측성능 추정

        np.random.seed(seed)

        if restart:
            for layer in self.net.layers: # 뉴런 내 모든 층은
                layer.first = True # 층을 초기화하라.
        
        for e in range(epochs):
            X_train, y_train = permute_data(X_train, y_train) # 데이터 셔플

            # 데이터가 배치 크기만큼 쪼개짐.
            batch_generator = self.generate_batches(X_train, y_train, batch_size)

            for ii, (X_batch, y_batch) in enumerate(batch_generator):
                # enumerate는 인덱스를 함께 출력해줌.
                self.net.train_batch(X_batch, y_batch) # 학습.
                self.optim.step() # 학습 후 나온 파라미터를 업데이트 해줌.

            if (e+1) % eval_every ==0:
                test_preds = self.net.forward(X_test)
                loss = self.net.loss.forward(test_preds, y_test)
                
                print(f'{e+1}에폭에서 검증 데이터에 대한 손실값: {loss:.3f}')

뉴런과 옵티마이저 클래스를 사용하는 트레이너 클래스입니다.

fit 함수로 데이터를 입력받아 데이터를 배치 단위로 쪼갠뒤 배치 데이터를 적용시켜 loss와 파라미터 기울기를 구합니다.

그 후 옵티마이저 내 step 함수를 사용해 파라미터를 파라미터 기울기를 사용해서 업데이트 해줍니다.

예제 자료 업로드

from sklearn.datasets import load_boston

boston = load_boston()
data = boston.data
target = boston.target
features = boston.feature_names

/usr/local/lib/python3.7/dist-packages/sklearn/utils/deprecation.py:87: FutureWarning: Function load_boston is deprecated; `load_boston` is deprecated in 1.0 and will be removed in 1.2.

    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_housing
        housing = fetch_california_housing()

    for the California housing dataset and::

        from sklearn.datasets import fetch_openml
        housing = fetch_openml(name="house_prices", as_frame=True)

    for the Ames housing dataset.
    
  warnings.warn(msg, category=FutureWarning)

from sklearn.preprocessing import StandardScaler
s = StandardScaler()
data = s.fit_transform(data)

def to_2d_np(a,type = 'col'):
    '''
    1차원 텐서를 2차원으로 변환
    '''

    assert a.ndim == 1, \
    "입력된 텐서는 1차원이어야 함"
    
    if type == "col":        
        return a.reshape(-1, 1)
    elif type == "row":
        return a.reshape(1, -1)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.3, random_state=80718)

# 목푯값을 2차원 배열로 변환
y_train, y_test = to_2d_np(y_train), to_2d_np(y_test)

예시

optimizer = SGD(lr = 0.01)
trainer = Trainer(neural_network, optimizer)

trainer.fit(X_train, y_train, X_test, y_test, epochs = 50)

10에폭에서 검증 데이터에 대한 손실값: 32.121
20에폭에서 검증 데이터에 대한 손실값: 26.972
30에폭에서 검증 데이터에 대한 손실값: 20.426
40에폭에서 검증 데이터에 대한 손실값: 18.131
50에폭에서 검증 데이터에 대한 손실값: 16.930

느낀점

딥러닝 관련해서 저번에 신경망을 간단하게 구현을 했었습니다.

오늘은 은닉층이 더 복잡해지기 때문에 일반화에 용이한 클래스로 딥러닝을 구현했습니다.

자바로 객체지향프로그래밍을 조금 안 상태에서 학습을 해도 파이썬 문법하고 다른 측면이 있어서 학습이 다소 힘들긴 했습니다.

처음 연산/층 클래스를 구현할 때는 이게 무슨 코드인지 이해가 안되고 재미도 없었는데 뉴런 부분을 구현할 때 전반적으로 책에서 정리를 해줘서 그 때 전반적인 감을 잡았던 것 같아요.

코드도 일부 누락되어 있어 깃허브 찾아보면서 매꾸는 등 어려운 과정이 참 많았지만 하길 잘 한것 같습니다.

딥러닝이란 무엇인가 정말 피부로 체감을 할 수 있었습니다. 그만큼 하나하나 천천히 이해하는데 시간이 오래걸리긴 했지만요.

맨날 나오는 신경망, 뉴런, 옵티마이저, 트레이너 등등 단어의 의미를 이전보다 훨씬 직관적으로 이해를 잘 할 수 있었던 시간인것 같습니다.

과정이 다소 복잡하기 때문에 주기적으로 복습을 하며 더 딥러닝과 가까워질 생각입니다.