Data maken m.b.v. moons

from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

Data maken m.b.v. moons#

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)

df = pd.DataFrame(X)
df["label"] = y
sns.scatterplot(data=df, x=0, y=1, hue="label")
plt.xlabel("Feature x1")
plt.ylabel("Feature x2")
plt.show()

../../../_images/228ed6132b006281b787b1804eba5cb828116be1831629352f4553d458bd97da.png

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

Ensemble aanmaken#

voting_clf = VotingClassifier(
    estimators=[
        ('lr', LogisticRegression(random_state=42)),
        ('rf', RandomForestClassifier(random_state=42)),
        ('svc', SVC(random_state=42))
    ]
)
voting_clf.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(random_state=42)),
                             ('rf', RandomForestClassifier(random_state=42)),
                             ('svc', SVC(random_state=42))])

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Losse scores#

for name, clf in voting_clf.named_estimators_.items():
    print(name, "=", clf.score(X_test, y_test))

lr = 0.864
rf = 0.896
svc = 0.896

X_test[:1]

array([[0.50169252, 0.21717211]])

voting_clf.predict(X_test[:1])

array([1], dtype=int64)

[clf.predict(X_test[:1]) for clf in voting_clf.estimators_]

[array([1], dtype=int64), array([1], dtype=int64), array([0], dtype=int64)]

Hard voting#

voting_clf.score(X_test, y_test)

0.912

Soft voting#

Hiervoor is het eerst nodig om de SVC om te zetten zodat hij probabilities terug gaat geven (i.p.v. 0 of 1)

voting_clf.named_estimators["svc"].probability = True

voting_clf.voting = "soft"
voting_clf.fit(X_train, y_train)
voting_clf.score(X_test, y_test)

0.92