회귀분석 알고리즘 구현

AI/MachineLearning

회귀분석 알고리즘 구현

향식이 2021. 6. 4. 08:25

회귀 분석의 절차

X라는 값이 입력되면 Y = 베타0 + 베타1*X라는 계산식을 통해 값을 산출하는 예측 함수를 정의한다.
실제 값 y와 예측 함수를 통해 도출된 예측값 pred_y간의 차이를 계산한다.
계산한 차이에 기반하여 베타0와 베타1를 업데이트하는 규칙을 정의하고 이를 바탕으로 베타0와 베타1의 값을 조정한다.
위의 과정을 특정 반복 횟수(iteration) 만큼 반복한다.
반복적으로 수정된 베타0와 베타1를 바탕으로 Y = 베타0 + 베타1*X라는 회귀식을 정의한다.

import numpy as np
import matplotlib.pyplot as plt

# 데이터를 생성하고 반환하는 함수
def load_data():
    
    X = np.array([[8.70153760], [3.90825773], [1.89362433], [3.28730045], [7.39333004], [2.98984649], [2.25757240], [9.84450732], [9.94589513], [5.48321616]])
    y = np.array([[5.64413093], [3.75876583], [3.87233310], [4.40990425], [6.43845020], [4.02827829], [2.26105955], [7.15768995], [6.29097441], [5.19692852]])
    
    return X, y

def prediction(beta_0,beta_1,X):
    
    pred_y = beta_0 + beta_1 * X
    
    return pred_y
    
def update_beta(beta_0,beta_1,X,loss,lr):
    
    # 이는 임의로 정할 수 있으나 기존 라이브러리에 있는 식으로 진행
    delta_0 = -(lr*(2/len(loss))*(np.dot(X.T, loss)))
    
    delta_1 = -(lr*(2/len(loss))*np.sum(loss))
    
    return delta_0, delta_1
    
def gradient_descent(X, y, iters, lr):
    
    # np.zeros는 초기화값을 0으로 해줌/ 1,1은 shape을 의미
    beta_0 = np.zeros((1,1))
    beta_1 = np.zeros((1,1))
    
    for i in range(iters):
        
        loss = y - prediction(beta_0, beta_1, X)
        
        beta0_delta, beta1_delta = update_beta(beta_0,beta_1,X,loss,lr)
        
        beta_0 -= beta0_delta
        beta_1 -= beta1_delta
        
        # 100번의 학습마다 그래프 출력하기
        if i%100==0:
            print("학습 횟수 :",i)
            plotting_graph(X,y,beta_0,beta_1)
        
    return beta_0, beta_1


# 그래프를 시각화하는 함수
def plotting_graph(X,y,beta_0,beta_1):
    
    y_pred = beta_0 + beta_1[0,0] * X
    
    fig = plt.figure()
    
    plt.scatter(X, y)
    plt.plot(X, y_pred,c='r')
    
    plt.savefig("test.png")
    elice_utils.send_image("test.png")


# 회귀 알고리즘 구현 진행을 위한 main() 함수
def main():
    
    # 학습을 위해 필요한 파라미터
    lr = 1e-4
    iteration = 1000
    
    X, y = load_data()
    
    beta_0, beta_1 = gradient_descent(X, y, iteration, lr)
    
    print("{}번의 학습 이후의 회귀 알고리즘 결과".format(iteration))
    print("beta_0:",beta_0[0], "beta_1:",beta_1[0])
    
    plotting_graph(X,y,beta_0,beta_1)
    
    return beta_0, beta_1

if __name__=="__main__":
    main()

출처: 앨리스 교육

저작자표시