# SVM process visualization

created at 08-01-2021 views: 4

## support vector machine¶

When drawing the two classifications, the marginal segmentation hyperplane is pursued, and the algorithm used is the support vector machine classifier under the linear kernel.

#1. import
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.datasets import make_blobs

# 2. Create split data
X,y = make_blobs(n_samples=40, centers=2, random_state=6)
print("X:{}, y:{}".format(X.shape,y.shape))
#Visualization Number
plt.scatter(X[:,0],X[:,1],c=y)

X:(40, 2), y:(40,)
<matplotlib.collections.PathCollection at 0x2661bb207b8> ## plot the decision boundary¶

#3. Draw the decision boundary
#First there is a scatter plot
plt.scatter(X[:,0],X[:,1],c=y)
ax=plt.gca()#Get the current subgraph, if it does not exist, create a new subgraph

#Making grid
xlim = ax.get_xlim()
ylim=ax.get_ylim()
#Form 30 regular data between the maximum and minimum
xx = np.linspace(xlim,xlim,30)
yy=np.linspace(ylim,ylim,30)

#Use meshgrid() to convert two one-dimensional vectors into feature matrices in order to obtain the horizontal and vertical coordinates of y.shape*x.shape so many coordinate points
XX,YY=np.meshgrid(xx,yy)
#Form a grid, vstack() stacks multiple one-dimensional arrays with the same structure in rows
xy=np.vstack([XX.ravel(),YY.ravel()]).T
plt.scatter(xy[:,0],xy[:,1],s=1,cmap='rainbow')

<matplotlib.collections.PathCollection at 0x2661cbbc828> # Fitting the model
clf = svm.SVC(kernel='linear',C=1000)
clf.fit(X,y)

SVC(C=1000, kernel='linear')


## Plot support vectors and separate hyperplanes¶

plt.scatter(X[:,0],X[:,1],c=y)
#decision_function, returns the distance of the decision boundary corresponding to each input sample
ax=plt.gca()#Get the current subgraph
Z=clf.decision_function(xy)
print('Z:{}'.format(Z.shape))
#contour requires the structure of Z to be consistent with the protection of X and Y
Z=Z.reshape(XX.shape)
print('new Z:{}'.format(Z.shape))
# Draw decision boundaries and margins
ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=1,
linestyles=['--','-','--'])#levels: Three contour lines.
# Draw support vector (three points)
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
linewidth=1, facecolors='none', edgecolors='k')

Z:(900,)
new Z:(30, 30)

<matplotlib.collections.PathCollection at 0x2661cc394a8> clf.support_vectors_ #Return support vector

ay([[ 7.27059007, -4.84225716],
[ 5.95313618, -6.82945967],
[ 7.89359985, -7.41655113]])

# Explore the built model
clf.predict(X)#According to the decision boundary, classify the samples in X, and the returned structure is n_samples

array([1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1])

clf.score(X,y)#Returns the average accuracy of the given test data and target
1.0


## Explore the essence of Z (decision_function())¶

# Z is the distance from the input sample to the decision boundary, and the level in the contour function is actually the distance input
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap='rainbow')
ax=plt.gca()
ax.contour(XX,YY,Z,
colors='k',
levels=[-3,3],
linestyles=['--','-'])

<matplotlib.contour.QuadContourSet at 0x2661ccaea58> created at:08-01-2021 doporos