Imports

Imports#

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Data#

from sklearn.datasets import load_iris
iris = load_iris(as_frame=True)
df = iris.data
df["target"] = iris.target

df.head()

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

df.tail(10)

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	target
140	6.7	3.1	5.6	2.4	2
141	6.9	3.1	5.1	2.3	2
142	5.8	2.7	5.1	1.9	2
143	6.8	3.2	5.9	2.3	2
144	6.7	3.3	5.7	2.5	2
145	6.7	3.0	5.2	2.3	2
146	6.3	2.5	5.0	1.9	2
147	6.5	3.0	5.2	2.0	2
148	6.2	3.4	5.4	2.3	2
149	5.9	3.0	5.1	1.8	2

Train-Test Split#

from sklearn.model_selection import train_test_split

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10)

Decision Tree#

from sklearn.tree import DecisionTreeClassifier

dtree = DecisionTreeClassifier(max_depth=2)

dtree.fit(X_train,y_train)

DecisionTreeClassifier(max_depth=2)

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Voorspelling en score#

y_pred = dtree.predict(X_test)

from sklearn.metrics import classification_report, confusion_matrix

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         5
           1       1.00      0.80      0.89         5
           2       0.83      1.00      0.91         5

    accuracy                           0.93        15
   macro avg       0.94      0.93      0.93        15
weighted avg       0.94      0.93      0.93        15

print(confusion_matrix(y_test, y_pred))

[[5 0 0]
 [0 4 1]
 [0 0 5]]

Visualisatie#

from IPython.display import Image  
from six import StringIO  
from sklearn.tree import export_graphviz
import pydot 

features = list(df.columns[0:4])
features

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

dot_data = StringIO()  
export_graphviz(dtree, out_file=dot_data, feature_names=features, filled=True, rounded=True)
graph = pydot.graph_from_dot_data(dot_data.getvalue())  
Image(graph[0].create_png())

../../../_images/53ff70ed0b4f5bf9a94e3047a484e5b8075c26ab927b34852ddef2b3ef4a7ade.png

Random Forest#

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rfc.fit(X_train, y_train)

RandomForestClassifier(n_estimators=500, n_jobs=-1)