I need to be able to create a python function for forecasting based on linear regression model with confidence bands on time-series data:
The function needs to take an a
I've made a linear regression using gradient descent in Python, perhaps that can help you. After the data are initialized, the loss function is created, which is ordinary least squares (OLS). Then the gradient function is created, and this is used in an iterative procedure to find the optimal parameters. These can then be used for forecasting. Here's a working example about the prediction of the grade obtained, given the number of books and the number of hours in class.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import *
from random import random, seed
from matplotlib import cm
# Initialize datapoints [# Books, hours in class], on the basis of which we want to make a prediction:
#x = np.array([[2104,3],[1600,3],[2400,3],[1416,2],[300,4],[2001,3]])
x = np.array([[0, 9],
[1, 15],
[0, 10],
[2, 16],
[4, 10],
[4, 20],
[1, 11],
[4, 20],
[3, 15],
[0, 15],
[2, 8],
[1, 13],
[4, 18],
[1, 10],
[0, 8],
[1, 10],
[3, 16],
[0, 11],
[1, 19],
[4, 12],
[4, 11],
[0, 19],
[2, 15],
[3, 15],
[1, 20],
[0, 6],
[3, 15],
[3, 19],
[2, 14],
[2, 13],
[3, 17],
[2, 20],
[2, 11],
[3, 20],
[4, 20],
[4, 20],
[3, 9],
[1, 8],
[2, 16],
[0, 10]])
# Initialize y, the grade obtained
y = np.array([45,
57,
45,
51,
65,
88,
44,
87,
89,
59,
66,
65,
56,
47,
66,
41,
56,
37,
45,
58,
47,
64,
97,
55,
51,
61,
69,
79,
71,
62,
87,
54,
43,
92,
83,
94,
60,
56,
88,
62])
#Initialize starting point for theta, the parameter we want to optimize:
#theta = np.array([90,0.1,-8])
theta = np.array([1,1,1])
# Initialize h_theta(x):
def h(x, theta):
return theta[0] + theta[1]*x[:,0] + theta[2]*x[:,1]
# Initialize the loss function (ordinary least squares), J(theta):
def lossf(theta):
loss = np.sum((h(x,theta)-y)*(h(x,theta)-y))
return loss
'''
for i in range(x.shape[0]):
lossIncrement = (h(x[[i]],theta) - y[i])*(h(x[[i]],theta) - y[i])
loss = loss + lossIncrement
return 0.5*loss
'''
# Define the gradient of the loss function for any parameters theta, use vector calculations here instead of for loops:
def gradlossf(theta):
# dJ/dTheta0:
d1 = np.sum(h(x,theta)-y)
d2 = np.sum((h(x,theta)-y)*x[:,0])
d3 = np.sum((h(x,theta)-y)*x[:,1])
return np.array([d1,d2,d3])
epsilon = 1e-2
max_iter = 1000
norm = np.linalg.norm(gradlossf(theta))
alpha0 = 1
iter = 0
beta = 1e-4
tolerance = 1e-6
tau = 0.5
# Start iterative procedure:
while iter < max_iter and norm > epsilon:
print theta
alpha = alpha0
alpha_found = False
while alpha_found is False:
xpoint = theta - alpha*gradlossf(theta)
val1 = lossf(xpoint)
val2 = lossf(theta)
if val1