%%javascript
Jupyter.keyboard_manager.command_shortcuts.remove_shortcut('up');
Jupyter.keyboard_manager.command_shortcuts.remove_shortcut('down');


from IPython import display


import matplotlib.pyplot as plt
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import axes3d
import pandas as pd

from rotate import rotanimate
import matplotlib.animation as animation
import subprocess
from IPython.display import Image

from matplotlib import cm
from IPython.display import Video
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import scale
plt.style.use('seaborn-whitegrid')

plt.rcParams["figure.figsize"] = [10,6]

#import numpy as np
#import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D
from sklearn import decomposition
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

from itertools import chain


import seaborn as sns; sns.set() # styling

from functools import reduce
import functools
import operator

from numpy.polynomial import polynomial as P

#### Defaults
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
plt.rcParams.update({'font.size': 12})
import seaborn as sns; sns.set() # styling


def foldl(func, acc, xs):
  return functools.reduce(func, xs, acc)

# tests
#print(foldl(operator.sub, 0, [1,2,3])) # -6
#print(foldl(operator.add, 'L', ['1','2','3'])) # 'L123'

def scanl_plus(data):
    '''
    returns list of successive reduced values from the list (see haskell foldl)
    '''
    return [0] + [sum(data[:(k+1)]) for (k,v) in enumerate(data)]

def make1D (data):
      return np.array(list(map (lambda x : [x],data)))

def celsius_to_fahr(temp):
    return 9/5 * temp + 32

def gen_answers_from_alphas(inputs, new_alphas):
  return (np.matmul(inputs,new_alphas))


def polyfitx(x, y, degree):
    results = {}

    coeffs = np.polyfit(x, y, degree)

     # Polynomial Coefficients
    results['polynomial'] = coeffs.tolist()

    # r-squared
    p = np.poly1d(coeffs)
    # fit values, and mean
    yhat = p(x)                         # or [p(z) for z in x]
    ybar = np.sum(y)/len(y)          # or sum(y)/len(y)
    ssreg = np.sum((yhat-ybar)**2)   # or sum([ (yihat - ybar)**2 for yihat in yhat])
    sstot = np.sum((y - ybar)**2)    # or sum([ (yi - ybar)**2 for yi in y])
    results['determination'] = ssreg / sstot

    return results

def showECResults (title,ec_alphas, actual_alphas, principle_vals,ans, ans_scaler):
    AxH2 = principle_vals.dot (ec_alphas)
    new_nnAnsH2 = ans_scaler.inverse_transform (AxH2)
    rH2 = np.corrcoef(ans,(new_nnAnsH2.reshape(-1,)))
    rSq = (rH2[1,0])**2
    print(rSq)
    fig, ax = plt.subplots(1,2)
    ax[0].plot(ans,new_nnAnsH,'o', color='green',marker=".", markersize=1);
    ax[1].plot(ec_alphas, color='green',marker=".", markersize=10);
    ax[1].plot(actual_alphas, ':o', color='orange',marker=".", markersize=12);
    fig.suptitle(title)
    plt.show()

def left_inverse (m ):
    return (np.linalg.solve (m.T.dot(m), m.T))

# Two ways to compute this ..
# rsq = 1 - residual / sum((y - y.mean())**2) 
#or
# rsq = 1 - residual / (n * y.var())
# https://stackoverflow.com/questions/3054191/converting-numpy-lstsq-residual-value-to-r2
def lstsq_rsq (output_from_lstsq, inputs, answers):
    rsq_ = 1 - output_from_lstsq[1] / sum ((answers - answers.mean())**2)
    return (rsq_[0])

def drawVector (origins,vectors):
    vectors_np = np.array(vectors)
    #origins = ([[0,0],[0,0]])
    V = vectors_np
    origins_t = zip(*origins) 

    fig, ax = plt.subplots()
    fig.set_size_inches(8,8)
    origins_l = np.array(list(map(lambda t: list(t),origins_t)))

    q = ax.quiver(*origins_l, (list(V[:,0])), (list(V[:,1])), color=['r','b','g','r'], scale=1,units='xy')
    ax.set_aspect('equal')
    lim = 7
    plt.xlim(-lim,lim)
    plt.ylim(-lim,lim)

    plt.title('Vector Tutorial',fontsize=10)

    #plt.savefig('savedFig.png', bbox_inches='tight')
    #print (type(q))

    plt.show()

    
    
# Reduce Example
#reduce(lambda a,b: a+b, [1,2,3,4,5], 0)


toy_ins = np.array([[1, 4, 3],
          [4, 6, 2],
          [3, 5, 4],
          [2, 2, 1],
          [6, 6, 1]])


good_ans = np.array([18, 22, 25,  9, 21])


toy_pca = decomposition.PCA(n_components=3)          # Creates PCA object
toy_princ_vals = toy_pca.fit_transform(toy_ins)          # Returns values in PCA space
toy_vr = scanl_plus ( toy_pca.explained_variance_ratio_ ) # make it cumulative (TPP)
toy_pcas = toy_pca.components_

print(toy_princ_vals.shape)
print(toy_pcas.shape)
print(toy_vr)
toy_princ_vals

(5, 3)
(3, 3)
[0, 0.6990178795111912, 0.971890627399632, 0.9999999999999999]

array([[-2.20094549, -0.88500297, -0.46109513],
       [ 1.50438887, -0.3658824 , -0.4928937 ],
       [-0.18388224, -1.73030859,  0.64204323],
       [-2.33458152,  2.03621564,  0.20869832],
       [ 3.21502037,  0.94497831,  0.10324729]])


good_ans_scaler = StandardScaler()
good_ans_scaled   = good_ans_scaler.fit_transform(make1D(good_ans)) # Returns scaled data
good_ans_scaled

array([[-0.18257419],
       [ 0.54772256],
       [ 1.09544512],
       [-1.82574186],
       [ 0.36514837]])


col_means = np.array(list(map (lambda i: np.mean (toy_ins[:,i]), [0,1,(len(toy_ins[0])-1)])))
print("Column means -> ", col_means)

Column means ->  [3.2 4.6 2.2]


print (toy_princ_vals.shape)
print (good_ans_scaled.shape)
pca_l_sq_scaled = np.linalg.lstsq(toy_princ_vals,good_ans_scaled,rcond=None)
print ("PCA Alphas (outs scaled) = ",pca_l_sq_scaled[0],"\n") 
pca_l_sq_scaled

(5, 3)
(5, 1)
PCA Alphas (outs scaled) =  [[ 0.28178443]
 [-0.59292673]
 [ 0.18894999]]

(array([[ 0.28178443],
        [-0.59292673],
        [ 0.18894999]]),
 array([8.64509327e-32]),
 3,
 array([4.78829682, 2.99169285, 0.96020176]))


lstsq_rsq (pca_l_sq_scaled,toy_princ_vals,good_ans_scaled )

1.0


# Flatten the PCA space Alphas (lstsq puts them in arrays)
print(pca_l_sq_scaled[0])
theAlphas = list(chain(*pca_l_sq_scaled[0]))
print(theAlphas)
real_alphas = toy_pca.inverse_transform(theAlphas)
# unscale ..
(real_alphas - col_means) * 5.4772258

[[ 0.28178443]
 [-0.59292673]
 [ 0.18894999]]
[0.28178443328826225, -0.5929267256335395, 0.1889499877019555]

array([1.00000004, 2.00000008, 3.00000012])


print(good_ans_scaler.scale_)
print(good_ans_scaler.mean_)
print(good_ans_scaler.var_)

[5.47722558]
[19.]
[30.]


toy_pca = decomposition.PCA(n_components=2)          # Creates PCA object
toy_princ_vals = toy_pca.fit_transform(toy_ins)          # Returns values in PCA space
toy_vr = scanl_plus ( toy_pca.explained_variance_ratio_ ) # make it cumulative (TPP)
toy_pcas = toy_pca.components_

print(toy_princ_vals.shape)
print(toy_pcas.shape)
print(toy_vr)
toy_princ_vals

(5, 2)
(2, 3)
[0, 0.6990178795111912, 0.971890627399632]

array([[-2.20094549, -0.88500297],
       [ 1.50438887, -0.3658824 ],
       [-0.18388224, -1.73030859],
       [-2.33458152,  2.03621564],
       [ 3.21502037,  0.94497831]])


good_ans_scaler = StandardScaler()
good_ans_scaled   = good_ans_scaler.fit_transform(make1D(good_ans)) # Returns scaled data
good_ans_scaled

array([[-0.18257419],
       [ 0.54772256],
       [ 1.09544512],
       [-1.82574186],
       [ 0.36514837]])


col_means = np.array(list(map (lambda i: np.mean (toy_ins[:,i]), [0,1,(len(toy_ins[0])-1)])))
print("Column means -> ", col_means)

Column means ->  [3.2 4.6 2.2]


print (toy_princ_vals.shape)
print (good_ans_scaled.shape)
pca_l_sq_scaled = np.linalg.lstsq(toy_princ_vals,good_ans_scaled,rcond=None)
print ("PCA Alphas (outs scaled) = ",pca_l_sq_scaled[0],"\n") 
pca_l_sq_scaled

(5, 2)
(5, 1)
PCA Alphas (outs scaled) =  [[ 0.28178443]
 [-0.59292673]]

(array([[ 0.28178443],
        [-0.59292673]]),
 array([0.03291689]),
 2,
 array([4.78829682, 2.99169285]))


lstsq_rsq (pca_l_sq_scaled,toy_princ_vals,good_ans_scaled )

0.9934166229732385


# Flatten the PCA space Alphas (lstsq puts them in arrays)
print(pca_l_sq_scaled[0])
theAlphas = list(chain(*pca_l_sq_scaled[0]))
print(theAlphas)
real_alphas = toy_pca.inverse_transform(theAlphas)
# unscale ..
(real_alphas - col_means) * 5.4772258

[[ 0.28178443]
 [-0.59292673]]
[0.28178443328826225, -0.5929267256335394]

array([0.40469853, 2.62258499, 2.42635636])

PCA Using all 3 Columns of input¶

Scale the outs¶

Compute the input column means¶

Solve for Alphas¶

Make sure $R^2$ is good ..¶

Get the Alphas into Real Space¶

We got the actual answers back from out toy problem !!!¶

That's why! That appears to be the output scaling !!¶

PCA Using Subset of Columns (2 instead of 3 in toy problem)¶

Scale the outs¶

Compute the input column means¶

Solve for Alphas¶

Make sure $R^2$ is good ..¶

Get the Alphas into Real Space¶

Didn't work with reduced columns ... even though $R^2$ was excellent on the fitted alphas and the cumulative variance was excellent for two columns with the PCA.¶