Libraries#
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
Data#
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
cancer.keys()
df = pd.DataFrame(cancer['data'], columns=cancer['feature_names'])
df.head()
PCA Visualisatie#
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)
scaled_data
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(scaled_data)
pca_data = pca.transform(scaled_data)
scaled_data.shape
pca_data.shape
plt.figure(figsize=(8,6))
plt.scatter(pca_data[:,0], pca_data[:,1], c=cancer['target'], cmap='plasma')
plt.xlabel('Eerste PC')
plt.ylabel('Tweede PC')
pca.explained_variance_ratio_
Interpretatie#
pca.components_
df_comp = pd.DataFrame(pca.components_, columns=cancer['feature_names'])
df_comp
plt.figure(figsize=(12,6))
sns.heatmap(df_comp, cmap='plasma')