Principal components analysis using pandas dataframe
Most sklearn objects work with pandas
dataframes just fine, would something like this work for you?
import pandas as pdimport numpy as npfrom sklearn.decomposition import PCAdf = pd.DataFrame(data=np.random.normal(0, 1, (20, 10)))pca = PCA(n_components=5)pca.fit(df)
You can access the components themselves with
pca.components_
import pandasfrom sklearn.decomposition import PCAimport numpyimport matplotlib.pyplot as plotdf = pandas.DataFrame(data=numpy.random.normal(0, 1, (20, 10)))# You must normalize the data before applying the fit methoddf_normalized=(df - df.mean()) / df.std()pca = PCA(n_components=df.shape[1])pca.fit(df_normalized)# Reformat and view resultsloadings = pandas.DataFrame(pca.components_.T,columns=['PC%s' % _ for _ in range(len(df_normalized.columns))],index=df.columns)print(loadings)plot.plot(pca.explained_variance_ratio_)plot.ylabel('Explained Variance')plot.xlabel('Components')plot.show()