import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model


X_vals = np.array([[0,1,2,4,6,7]]).T
y_vals = np.array([[1,3,1,3,7,5]]).T
# this is the number of data points
Np = X_vals.shape[0]


plt.plot(X_vals,y_vals,',r', marker='o')
plt.xlabel('x'); plt.ylabel('y');


print(f'X_vals = {X_vals.T} and y_vals = {y_vals.T}')

X_vals = [[0 1 2 4 6 7]] and y_vals = [[1 3 1 3 7 5]]


# linear polynomial regression: yhat1 = mx+c
# set up the design matrix
X = np.c_[np.ones([Np,1]), X_vals]
# solve the normal equations
theta1 = theta = np.linalg.solve(X.T @ X, X.T @ y_vals)
print(f'c = {theta[0]} and m = {theta[1]}')
plt.plot(X_vals, y_vals,'.r', marker='o')
y_hat1 = theta[0] + theta[1]*X_vals
plt.plot(X_vals, y_hat1,'g', marker='x')
plt.xlabel('x'); plt.ylabel('y');

c = [1.01694915] and m = [0.69491525]


# quadratic polynomial regression: stack up the design matrix
X = np.hstack( (np.ones([Np,1]), X_vals, X_vals**2) )
# solve the normal equations
theta = np.linalg.solve(X.T @ X, X.T @ y_vals)
plt.plot(X_vals,y_vals,'.r',marker='o')
#y_hat2 = theta[0] + theta[1]*X_vals + theta[2]*X_vals*X_vals
# the line above is not wrong, but this is more elegant...
y_hat2 = X @ theta
plt.plot(X_vals, y_hat2,'g', marker='x')
plt.xlabel('x'); plt.ylabel('y');


# quintic polynomial regression: stack up the design matrix
X = np.ones([Np,1])
for k in range(1,6): X = np.hstack( (X, X_vals**k) )
theta = np.linalg.solve(X.T @ X, X.T @ y_vals)
y_hat5 = X @ theta
plt.plot(X_vals, y_vals,'.r',marker='o')
plt.plot(X_vals, y_hat5,'b',marker='x')
plt.xlabel('x'); plt.ylabel('y');


plt.plot(X_vals,y_vals,'.r',marker='o')
# a fine grid of x-values, and a re-built design matrix with them
X_grid = np.arange(0,1+X_vals[Np-1],0.1).reshape(-1,1)
X = np.c_[np.ones([X_grid.shape[0],1]), X_grid]
for k in range(2,Np): X = np.c_[X, np.power(X_grid,k)]
y_hat5 = X @ theta
plt.plot(X_grid, y_hat5,'b')
plt.xlabel('x'); plt.ylabel('y');


# Overfitting - generalization
X_new = np.array([[1,3,6]]).T; y_new = np.array([[2,3,5]]).T 
plt.plot(X_vals, y_vals,'.r', marker='o'); plt.plot(X_grid, y_hat5,'b')
plt.plot(X_vals, y_hat1,'g', marker='x'); plt.plot(X_new,  y_new, 'dk')

[<matplotlib.lines.Line2D at 0x7fd77971cd30>]


# standard regression
reg_linear = linear_model.LinearRegression()
reg_linear.fit(X_vals, y_vals)
print('reg_coef_ = ', reg_linear.coef_)
print('reg_intercept_ = ', reg_linear.intercept_)
print(f'Our earlier values were: c = {theta1[0]} and m = {theta1[1]}')

reg_coef_ =  [[0.69491525]]
reg_intercept_ =  [1.01694915]
Our earlier values were: c = [1.01694915] and m = [0.69491525]


# Make predictions from the fitted model
y_pred = reg_linear.predict(X_vals)
plt.plot(X_vals,y_vals,'.r',marker='o')
plt.plot(X_vals,y_pred,'b',marker='d')
#plt.plot(X_vals,y_hat, 'g',marker='x', markersize=20)

[<matplotlib.lines.Line2D at 0x7fd758847470>]


print('previous reg_coef_ = ', reg_linear.coef_, end=', ')
print('previous reg_intercept_ = ', reg_linear.intercept_)
# ridge regression with 
reg_ridge = linear_model.Ridge(alpha=0.5)
reg_ridge.fit(X_vals, y_vals)
# Make predictions using the testing set
y_hat_ridge = reg_linear.predict(X_vals)
print('reg_coef_ = ', reg_ridge.coef_, end=', ')
print('reg_intercept_ = ', reg_ridge.intercept_)

previous reg_coef_ =  [[0.69491525]], previous reg_intercept_ =  [1.01694915]
reg_coef_ =  [[0.68619247]], reg_intercept_ =  [1.0460251]


plt.plot(X_vals, y_vals,'.r',marker='o')
plt.plot(X_vals, y_hat1,'.b',marker='d')
plt.plot(X_vals, y_hat_ridge,'g',marker='x')

[<matplotlib.lines.Line2D at 0x7fd738fea080>]


# standard regression
reg_linear = linear_model.LinearRegression()
reg_linear.fit(X_vals, y_vals)
# Make predictions
y_hatOL = reg_linear.predict(X_vals)
# ridge regression
reg_ridge = linear_model.Ridge(alpha=0.5)
reg_ridge.fit(X_vals, y_vals)
# Make predictions
y_hatR = reg_ridge.predict(X_vals)
print('linear reg_coef_ = ', reg_linear.coef_)
print('linear reg_intercept_ = ', reg_linear.intercept_)
print('ridge reg_coef_ = ', reg_ridge.coef_)
print('ridge reg_intercept_ = ', reg_ridge.intercept_)

linear reg_coef_ =  [[0.69491525]]
linear reg_intercept_ =  [1.01694915]
ridge reg_coef_ =  [[0.68619247]]
ridge reg_intercept_ =  [1.0460251]


plt.plot(X_vals,y_vals,'.r',marker='o')
plt.plot(X_vals,y_hatOL,'.b',marker='d')
plt.plot(X_vals,y_hatR,'g',marker='x')

[<matplotlib.lines.Line2D at 0x7fd7798034a8>]


print('linear reg_coef_ = ', reg_linear.coef_)
print('linear reg_intercept_ = ', reg_linear.intercept_)
print('ridge reg_coef_ = ', reg_ridge.coef_)
print('ridge reg_intercept_ = ', reg_ridge.intercept_)
# standard regression
reg_linear = linear_model.LinearRegression()
reg_linear.fit(X_vals, y_vals)
# Make predictions
y_hatOL = reg_linear.predict(X_vals)
# LASSO regression
reg_lasso = linear_model.Lasso(alpha=.5)
reg_lasso.fit(X_vals, y_vals)
# Make predictions
y_hatL = reg_lasso.predict(X_vals)
print('LASSO reg_coef_ = ', reg_lasso.coef_)
print('LASSO reg_intercept_ = ', reg_lasso.intercept_)

linear reg_coef_ =  [[0.69491525]]
linear reg_intercept_ =  [1.01694915]
ridge reg_coef_ =  [[0.68619247]]
ridge reg_intercept_ =  [1.0460251]
LASSO reg_coef_ =  [0.61864407]
LASSO reg_intercept_ =  [1.27118644]


plt.plot(X_vals,y_vals,'.r',marker='o')
plt.plot(X_vals,y_hatOL,'.b',marker='d')
plt.plot(X_vals,y_hatR,'g',marker='x')
plt.plot(X_vals,y_hatL,'c',marker='s')

[<matplotlib.lines.Line2D at 0x7fd739047e10>]


def sigma(x, a):
    return (1+np.exp(-a*x))**(-1)

x_vals = np.arange(-20, 20.1, 0.1)
y_vals_1 = sigma(x_vals, 1)
y_vals_03 = sigma(x_vals, 0.3)
y_vals_10 = sigma(x_vals, 10)

plt.figure(figsize=(10,4)); plt.gca().set_aspect(10)
plt.plot(x_vals, y_vals_1, color='blue', label='a = 1')
plt.plot(x_vals, y_vals_03, color='red', label='a = 0.3')
plt.plot(x_vals, y_vals_10, color='green', label='a = 10')
plt.xlabel('x'); plt.ylabel('sigma'); 
plt.legend()
plt.show()


def sigma(x, a, x0):
    return (1+np.exp(-a*(x-x0)))**(-1)

x_vals = np.arange(-20, 20.1, 0.1)
y_vals_0 = sigma(x_vals, 1, 0)
y_vals_5 = sigma(x_vals, 0.3, 5)
y_vals_m10 = sigma(x_vals, 10, -10)

plt.figure(figsize=(10,4)); plt.gca().set_aspect(10)
plt.plot(x_vals, y_vals_0, color='blue', label='a = 1')
plt.plot(x_vals, y_vals_5, color='red', label='a = 0.3')
plt.plot(x_vals, y_vals_m10, color='green', label='a = 10')
plt.xlabel('x'); plt.ylabel('sigma'); 
plt.legend()
plt.show()


from matplotlib import cm

fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(projection='3d')

# Define dimensions
Nx, Ny, Nz = 10, 10, 1
X, Y, Z = np.meshgrid(np.arange(-2,Nx,0.1), np.arange(-2,Ny,0.1), np.arange(Nz))

# Create sigmoid data
sigmoid = (1+np.exp(-(Y-X)))**(-1)
# plot surface
ax.plot_surface(X[:, :, 0], Y[:, :, 0], sigmoid[:, :, 0], cmap=cm.Wistia, alpha=0.75)
ax.set_xlabel('x_1')
ax.set_ylabel('x_2')
ax.set_zlabel('logistic values')
ax.view_init(30, 30)
plt.show()


# load it in and take a look
sns.get_dataset_names()
dfi = sns.load_dataset('iris')
dfi.head()


sns.pairplot(dfi, hue='species', height = 1.5)

<seaborn.axisgrid.PairGrid at 0x7fd75889e4e0>


# we want a binary classifier so we drop the virginica data
dfid = dfi[ (dfi['species'] != 'virginica') == True ]
sns.pairplot(dfid, hue='species', height = 1.5)

<seaborn.axisgrid.PairGrid at 0x7fd728183198>


dfid.head()


# let's use petal length and sepal width as our features
xall = dfid.iloc[:,[1,2]].values
# and species as our label
yall = dfid.iloc[:, 4].values
print(xall[0:5,:], yall[0:5])

[[3.5 1.4]
 [3.  1.4]
 [3.2 1.3]
 [3.1 1.5]
 [3.6 1.4]] ['setosa' 'setosa' 'setosa' 'setosa' 'setosa']


# select the setosa and versicolor feature rows
x_set = xall[yall == 'setosa',:] 
x_ver = xall[yall == 'versicolor',:]
# set the vertical coordinate for the 3D surface plot
z_set = (1+np.exp(-(x_set[:,1]-x_set[:,0]) ))**(-1)
z_ver = (1+np.exp(-(x_ver[:,1]-x_ver[:,0]) ))**(-1)


fig = plt.figure(figsize=(6,6)) # some of this set up was done above
ax = fig.add_subplot(projection='3d')
ax.plot_surface(X[:, :, 0], Y[:, :, 0], sigmoid[:, :, 0], cmap=cm.Wistia, alpha=0.75)
ax.scatter(x_set[:,0], x_set[:,1], z_set, c='black', marker='o')
ax.scatter(x_ver[:,0], x_ver[:,1], z_ver, c='red', marker='o')
ax.set_xlabel('sepal width'); ax.set_ylabel('petal length')
ax.set_zlabel('logistic values'); ax.view_init(10, 10); plt.show()

Polynomial Regression¶

variationalform https://variationalform.github.io/¶

Just Enough: progress at pace¶

What this is about:¶

Assigned Reading¶

Context¶

The Basic Idea¶

Loss and Cost¶

The Normal Equations¶

Linear Least Squares Regression Solved¶

Higher Degree Polynomial Regression¶

Linear Regression in Scikit-Learn¶

Regularization¶

Summary¶

Multivariate Linear Regression¶

Overview¶

The $p > n$ Issue¶

Logistic Regression¶

The Iris Data Set¶

Discussion¶

Technical Details¶

The log-odds¶

How Do We Do Logistic Regression?¶

Closing Remarks¶

The $p>n$ Issue¶

Bias-Variance Decomposition and Trade-Off¶

Review¶

Homework¶

	sepal_length	sepal_width	petal_length	petal_width	species
0	5.1	3.5	1.4	0.2	setosa
1	4.9	3.0	1.4	0.2	setosa
2	4.7	3.2	1.3	0.2	setosa
3	4.6	3.1	1.5	0.2	setosa
4	5.0	3.6	1.4	0.2	setosa