Commit 2f174d1c authored by Jens Ehlers's avatar Jens Ehlers
Browse files

fixed typo

parent 94540f77
......@@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Hauptkomponentenanalyse mit dem Bostong Housing-Datensatz"
"# Hauptkomponentenanalyse mit dem Boston Housing-Datensatz"
]
},
{
......
%% Cell type:markdown id: tags:
# Hauptkomponentenanalyse mit dem Bostong Housing-Datensatz
# Hauptkomponentenanalyse mit dem Boston Housing-Datensatz
%% Cell type:markdown id: tags:
### Daten einlesen
%% Cell type:code id: tags:
``` python
from sklearn.datasets import load_boston
boston = load_boston()
import pandas as pd, numpy as np
pd.set_option('display.max_rows', 6)
df = pd.DataFrame(data = np.c_[boston['data'], boston['target']], columns = boston['feature_names'].tolist() + ['TARGET'])
df
```
%%%% Output: execute_result
CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX \
0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0
1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0
2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0
.. ... ... ... ... ... ... ... ... ... ...
503 0.06076 0.0 11.93 0.0 0.573 6.976 91.0 2.1675 1.0 273.0
504 0.10959 0.0 11.93 0.0 0.573 6.794 89.3 2.3889 1.0 273.0
505 0.04741 0.0 11.93 0.0 0.573 6.030 80.8 2.5050 1.0 273.0
PTRATIO B LSTAT TARGET
0 15.3 396.90 4.98 24.0
1 17.8 396.90 9.14 21.6
2 17.8 392.83 4.03 34.7
.. ... ... ... ...
503 21.0 396.90 5.64 23.9
504 21.0 393.45 6.48 22.0
505 21.0 396.90 7.88 11.9
[506 rows x 14 columns]
%% Cell type:markdown id: tags:
### Matrix der Korrelationskoeffizienten
%% Cell type:code id: tags:
``` python
corr = df.corr()
# generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool); mask[np.triu_indices_from(mask)] = True
# generate a custom colormap
import seaborn as sns
cmap = sns.diverging_palette(220, 10, as_cmap=True)
%matplotlib inline
sns.heatmap(corr, mask=mask, cmap=cmap, vmin=-1, vmax=1)
```
%%%% Output: execute_result
<AxesSubplot:>
%%%% Output: display_data
%% Cell type:markdown id: tags:
### Standardisierung und Hauptkomponentenanalyse
%% Cell type:code id: tags:
``` python
from sklearn.preprocessing import StandardScaler
x = StandardScaler().fit_transform(boston['data'])
from sklearn.decomposition import PCA
pca = PCA(n_components = 3)
xd = pca.fit_transform(x)
pca.components_.shape
```
%%%% Output: execute_result
(3, 13)
%% Cell type:code id: tags:
``` python
print( "Erklärter Anteil der Varianz je Hauptkomponente = ", pca.explained_variance_ratio_ )
sum(pca.explained_variance_ratio_)
```
%%%% Output: stream
Erklärter Anteil der Varianz je Hauptkomponente = [0.47129606 0.11025193 0.0955859 ]
%%%% Output: execute_result
0.6771338939748568
%% Cell type:markdown id: tags:
### Beitrag je Merkmal zu den Hauptkomponenten
%% Cell type:code id: tags:
``` python
import matplotlib.pyplot as plt
plt.matshow(pca.components_, cmap=cmap)
plt.yticks([0, 1, 2], ["PC1", "PC2", "PC3"])
plt.xticks(range(len(boston.feature_names)), boston.feature_names)
plt.colorbar()
```
%%%% Output: execute_result
<matplotlib.colorbar.Colorbar at 0x268ff204c70>
%%%% Output: display_data
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment