%%javascript
Jupyter.keyboard_manager.command_shortcuts.remove_shortcut('up');
Jupyter.keyboard_manager.command_shortcuts.remove_shortcut('down');
from IPython import display
Imports ..
import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import axes3d
import pandas as pd
from rotate import rotanimate
import matplotlib.animation as animation
import subprocess
from IPython.display import Image
from matplotlib import cm
from IPython.display import Video
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import scale
plt.style.use('seaborn-whitegrid')
plt.rcParams["figure.figsize"] = [10,6]
#import numpy as np
#import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D
from sklearn import decomposition
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from itertools import chain
import seaborn as sns; sns.set() # styling
from functools import reduce
import functools
import operator
from numpy.polynomial import polynomial as P
#### Defaults
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
plt.rcParams.update({'font.size': 12})
import seaborn as sns; sns.set() # styling
Functions
def foldl(func, acc, xs):
return functools.reduce(func, xs, acc)
# tests
#print(foldl(operator.sub, 0, [1,2,3])) # -6
#print(foldl(operator.add, 'L', ['1','2','3'])) # 'L123'
def scanl_plus(data):
'''
returns list of successive reduced values from the list (see haskell foldl)
'''
return [0] + [sum(data[:(k+1)]) for (k,v) in enumerate(data)]
def make1D (data):
return np.array(list(map (lambda x : [x],data)))
def celsius_to_fahr(temp):
return 9/5 * temp + 32
def gen_answers_from_alphas(inputs, new_alphas):
return (np.matmul(inputs,new_alphas))
def polyfitx(x, y, degree):
results = {}
coeffs = np.polyfit(x, y, degree)
# Polynomial Coefficients
results['polynomial'] = coeffs.tolist()
# r-squared
p = np.poly1d(coeffs)
# fit values, and mean
yhat = p(x) # or [p(z) for z in x]
ybar = np.sum(y)/len(y) # or sum(y)/len(y)
ssreg = np.sum((yhat-ybar)**2) # or sum([ (yihat - ybar)**2 for yihat in yhat])
sstot = np.sum((y - ybar)**2) # or sum([ (yi - ybar)**2 for yi in y])
results['determination'] = ssreg / sstot
return results
def showECResults (title,ec_alphas, actual_alphas, principle_vals,ans, ans_scaler):
AxH2 = principle_vals.dot (ec_alphas)
new_nnAnsH2 = ans_scaler.inverse_transform (AxH2)
rH2 = np.corrcoef(ans,(new_nnAnsH2.reshape(-1,)))
rSq = (rH2[1,0])**2
print(rSq)
fig, ax = plt.subplots(1,2)
ax[0].plot(ans,new_nnAnsH,'o', color='green',marker=".", markersize=1);
ax[1].plot(ec_alphas, color='green',marker=".", markersize=10);
ax[1].plot(actual_alphas, ':o', color='orange',marker=".", markersize=12);
fig.suptitle(title)
plt.show()
def left_inverse (m ):
return (np.linalg.solve (m.T.dot(m), m.T))
# Two ways to compute this ..
# rsq = 1 - residual / sum((y - y.mean())**2)
#or
# rsq = 1 - residual / (n * y.var())
# https://stackoverflow.com/questions/3054191/converting-numpy-lstsq-residual-value-to-r2
def lstsq_rsq (output_from_lstsq, inputs, answers):
rsq_ = 1 - output_from_lstsq[1] / sum ((answers - answers.mean())**2)
return (rsq_[0])
def drawVector (origins,vectors):
vectors_np = np.array(vectors)
#origins = ([[0,0],[0,0]])
V = vectors_np
origins_t = zip(*origins)
fig, ax = plt.subplots()
fig.set_size_inches(8,8)
origins_l = np.array(list(map(lambda t: list(t),origins_t)))
q = ax.quiver(*origins_l, (list(V[:,0])), (list(V[:,1])), color=['r','b','g','r'], scale=1,units='xy')
ax.set_aspect('equal')
lim = 7
plt.xlim(-lim,lim)
plt.ylim(-lim,lim)
plt.title('Vector Tutorial',fontsize=10)
#plt.savefig('savedFig.png', bbox_inches='tight')
#print (type(q))
plt.show()
# Reduce Example
#reduce(lambda a,b: a+b, [1,2,3,4,5], 0)
Problem
toy_ins = np.array([[1, 4, 3],
[4, 6, 2],
[3, 5, 4],
[2, 2, 1],
[6, 6, 1]])
good_ans = np.array([18, 22, 25, 9, 21])
The 'alphas' are [1,2,3]
toy_pca = decomposition.PCA(n_components=3) # Creates PCA object
toy_princ_vals = toy_pca.fit_transform(toy_ins) # Returns values in PCA space
toy_vr = scanl_plus ( toy_pca.explained_variance_ratio_ ) # make it cumulative (TPP)
toy_pcas = toy_pca.components_
print(toy_princ_vals.shape)
print(toy_pcas.shape)
print(toy_vr)
toy_princ_vals
(5, 3) (3, 3) [0, 0.6990178795111912, 0.971890627399632, 0.9999999999999999]
array([[-2.20094549, -0.88500297, -0.46109513], [ 1.50438887, -0.3658824 , -0.4928937 ], [-0.18388224, -1.73030859, 0.64204323], [-2.33458152, 2.03621564, 0.20869832], [ 3.21502037, 0.94497831, 0.10324729]])
good_ans_scaler = StandardScaler()
good_ans_scaled = good_ans_scaler.fit_transform(make1D(good_ans)) # Returns scaled data
good_ans_scaled
array([[-0.18257419], [ 0.54772256], [ 1.09544512], [-1.82574186], [ 0.36514837]])
These are required to get the alphas back into real space
col_means = np.array(list(map (lambda i: np.mean (toy_ins[:,i]), [0,1,(len(toy_ins[0])-1)])))
print("Column means -> ", col_means)
Column means -> [3.2 4.6 2.2]
print (toy_princ_vals.shape)
print (good_ans_scaled.shape)
pca_l_sq_scaled = np.linalg.lstsq(toy_princ_vals,good_ans_scaled,rcond=None)
print ("PCA Alphas (outs scaled) = ",pca_l_sq_scaled[0],"\n")
pca_l_sq_scaled
(5, 3) (5, 1) PCA Alphas (outs scaled) = [[ 0.28178443] [-0.59292673] [ 0.18894999]]
(array([[ 0.28178443], [-0.59292673], [ 0.18894999]]), array([8.64509327e-32]), 3, array([4.78829682, 2.99169285, 0.96020176]))
lstsq_rsq (pca_l_sq_scaled,toy_princ_vals,good_ans_scaled )
1.0
# Flatten the PCA space Alphas (lstsq puts them in arrays)
print(pca_l_sq_scaled[0])
theAlphas = list(chain(*pca_l_sq_scaled[0]))
print(theAlphas)
real_alphas = toy_pca.inverse_transform(theAlphas)
# unscale ..
(real_alphas - col_means) * 5.4772258
[[ 0.28178443] [-0.59292673] [ 0.18894999]] [0.28178443328826225, -0.5929267256335395, 0.1889499877019555]
array([1.00000004, 2.00000008, 3.00000012])
Not sure why we had to scale it by 5.47725...
print(good_ans_scaler.scale_)
print(good_ans_scaler.mean_)
print(good_ans_scaler.var_)
[5.47722558] [19.] [30.]
toy_pca = decomposition.PCA(n_components=2) # Creates PCA object
toy_princ_vals = toy_pca.fit_transform(toy_ins) # Returns values in PCA space
toy_vr = scanl_plus ( toy_pca.explained_variance_ratio_ ) # make it cumulative (TPP)
toy_pcas = toy_pca.components_
print(toy_princ_vals.shape)
print(toy_pcas.shape)
print(toy_vr)
toy_princ_vals
(5, 2) (2, 3) [0, 0.6990178795111912, 0.971890627399632]
array([[-2.20094549, -0.88500297], [ 1.50438887, -0.3658824 ], [-0.18388224, -1.73030859], [-2.33458152, 2.03621564], [ 3.21502037, 0.94497831]])
good_ans_scaler = StandardScaler()
good_ans_scaled = good_ans_scaler.fit_transform(make1D(good_ans)) # Returns scaled data
good_ans_scaled
array([[-0.18257419], [ 0.54772256], [ 1.09544512], [-1.82574186], [ 0.36514837]])
These are required to get the alphas back into real space
col_means = np.array(list(map (lambda i: np.mean (toy_ins[:,i]), [0,1,(len(toy_ins[0])-1)])))
print("Column means -> ", col_means)
Column means -> [3.2 4.6 2.2]
print (toy_princ_vals.shape)
print (good_ans_scaled.shape)
pca_l_sq_scaled = np.linalg.lstsq(toy_princ_vals,good_ans_scaled,rcond=None)
print ("PCA Alphas (outs scaled) = ",pca_l_sq_scaled[0],"\n")
pca_l_sq_scaled
(5, 2) (5, 1) PCA Alphas (outs scaled) = [[ 0.28178443] [-0.59292673]]
(array([[ 0.28178443], [-0.59292673]]), array([0.03291689]), 2, array([4.78829682, 2.99169285]))
lstsq_rsq (pca_l_sq_scaled,toy_princ_vals,good_ans_scaled )
0.9934166229732385
# Flatten the PCA space Alphas (lstsq puts them in arrays)
print(pca_l_sq_scaled[0])
theAlphas = list(chain(*pca_l_sq_scaled[0]))
print(theAlphas)
real_alphas = toy_pca.inverse_transform(theAlphas)
# unscale ..
(real_alphas - col_means) * 5.4772258
[[ 0.28178443] [-0.59292673]] [0.28178443328826225, -0.5929267256335394]
array([0.40469853, 2.62258499, 2.42635636])