kNN, Naibe Bayesm, SVM, (Random Forrest)をScikit-learnでやってみた。データはiPython NotebookでReSTで出力したものをpandocでmarkdown_strictに変換しなおしてblogに貼り付けた。
描画用のヘルパー関数とデータセットの生成
from matplotlib.colors import ListedColormap
import Image
from numpy import *
from pylab import *
import pickle
def myplot_2D_boundary(X,y):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.show()
with open('points_normal.pkl', 'r') as f:
class_1 = pickle.load(f)
class_2 = pickle.load(f)
labels = pickle.load(f)
X_normal = np.r_[class_1, class_2]
y_normal = labels
with open('points_ring.pkl', 'r') as f:
class_1 = pickle.load(f)
class_2 = pickle.load(f)
labels = pickle.load(f)
X_ring = np.r_[class_1, class_2]
y_ring = labels
with open('points_normal_test.pkl', 'r') as f:
class_1 = pickle.load(f)
class_2 = pickle.load(f)
labels = pickle.load(f)
X_normal_test = np.r_[class_1, class_2]
y_normal_test = labels
with open('points_ring_test.pkl', 'r') as f:
class_1 = pickle.load(f)
class_2 = pickle.load(f)
labels = pickle.load(f)
X_ring_test = np.r_[class_1, class_2]
y_ring_test = labels
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00'])
kNN
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn import neighbors, datasets
clf = neighbors.KNeighborsClassifier(3)
clf.fit(X_normal, y_normal)
myplot_2D_boundary(X_normal,y_normal)

clf = neighbors.KNeighborsClassifier(3)
clf.fit(X_ring, y_ring)
myplot_2D_boundary(X_ring,y_ring)

ベイズ
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_normal, y_normal)
labels_pred = clf.predict(X_normal_test)
print "Number of mislabeled points out of a total %d points : %d" % (y_normal_test.shape[0],(y_normal_test != labels_pred).sum())
myplot_2D_boundary(X_normal,y_normal)

clf = GaussianNB()
clf.fit(X_ring, y_ring)
labels_pred = clf.predict(X_ring_test)
print "Number of mislabeled points out of a total %d points : %d" % (y_ring_test.shape[0],(y_ring_test != labels_pred).sum())
myplot_2D_boundary(X_ring,y_ring)

SVM
from sklearn import svm
clf = svm.SVC()
clf.fit(X_normal, y_normal)
labels_pred = clf.predict(X_normal_test)
print "Number of mislabeled points out of a total %d points : %d" % (y_normal_test.shape[0],(y_normal_test != labels_pred).sum())
myplot_2D_boundary(X_normal, y_normal)

clf = svm.SVC()
clf.fit(X_ring, y_ring)
labels_pred = clf.predict(X_ring_test)
print "Number of mislabeled points out of a total %d points : %d" % (y_ring_test.shape[0],(y_ring_test != labels_pred).sum())
myplot_2D_boundary(X_ring,y_ring)

Random Forest
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=10)
clf.fit(X_normal, y_normal)
labels_pred = clf.predict(X_normal_test)
print "Number of mislabeled points out of a total %d points : %d" % (y_normal_test.shape[0],(y_normal_test != labels_pred).sum())
myplot_2D_boundary(X_normal,y_normal)

from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_estimators=10)
clf.fit(X_ring, y_ring)
labels_pred = clf.predict(X_ring_test)
print "Number of mislabeled points out of a total %d points : %d" % (y_ring_test.shape[0],(y_ring_test != labels_pred).sum())
myplot_2D_boundary(X_ring,y_ring)
