import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
def iris_data():
from sklearn.datasets import load_iris
iris = load_iris()
return iris.data, iris.target
def toy_data():
X = np.array([[3, 3], [4, 3], [1, 1]])
Y = np.array([1, 1, -1])
return X, Y
def random_data():
np.random.seed(2333)
X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]] # np.r_ 将两矩阵上下相连接起来
Y = [0] * 20 + [1] * 20
return X, Y
svm.LinearSVC | Linear Support Vector Classification. |
svm.LinearSVR | Linear Support Vector Regression. |
svm.NuSVC | Nu-Support Vector Classification. |
svm.NuSVR | Nu Support Vector Regression. |
svm.SVC | C-Support Vector Classification. |
svm.SVR | Epsilon-Support Vector Regression. |
svm.OneClassSVM | Unsupervised Outlier Detection. |
class sklearn.svm.SVC(*, C=1.0, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=- 1, decision_function_shape='ovr', break_ties=False, random_state=None)
C
:float, default=1.0 kernel
:{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’}, default=’rbf’degree
:int, default=3gamma
: {‘scale’, ‘auto’} or float, default=’scale’decision_function_shape
:{‘ovo’, ‘ovr’}, default=’ovr’break_ties
:supprt_vectors_
decision_function(X)
fit(X,y[,sample_weight])
get_params([deep])
predict(X)
score(X,y[,sample_weight])
set_params(**params)
#X, Y = toy_data()
X, Y = random_data()
#X, Y = iris_data()
#print(X,Y)
cnt = 0
def train_model(para1, para2):
global cnt
cnt += 1
print("{:=^100}".format(" 第"+ str(cnt) +"次训练 "))
model = svm.SVC(kernel = para1, C = para2)
model.fit(X,Y)
pred = model.predict(X)
print(classification_report(Y, pred))
train_model("linear",1)
train_model("linear",5)
train_model("poly",5)
============================================== 第1次训练 =============================================== precision recall f1-score support 0 1.00 1.00 1.00 20 1 1.00 1.00 1.00 20 accuracy 1.00 40 macro avg 1.00 1.00 1.00 40 weighted avg 1.00 1.00 1.00 40 ============================================== 第2次训练 =============================================== precision recall f1-score support 0 1.00 1.00 1.00 20 1 1.00 1.00 1.00 20 accuracy 1.00 40 macro avg 1.00 1.00 1.00 40 weighted avg 1.00 1.00 1.00 40 ============================================== 第3次训练 =============================================== precision recall f1-score support 0 1.00 1.00 1.00 20 1 1.00 1.00 1.00 20 accuracy 1.00 40 macro avg 1.00 1.00 1.00 40 weighted avg 1.00 1.00 1.00 40
超参数:在机器学习的上下文中,超参数是在开始学习过程之前设置值的参数,
X, Y = iris_data()
#print(X,Y)
print(len(X),len(Y))
# print(classification_report(Y, pred))
# print(Y)
# print(pred)
# print(Y == pred)
cnt = 0
res = []
def train_model(size,ga,df):
global cnt,res
cnt += 1
print("{:=^100}".format(" 第"+ str(cnt) +"次训练 "))
train_data,test_data,train_label,test_label=train_test_split(X,Y,train_size=size)
model = svm.SVC(C=2,kernel='rbf',gamma=ga,decision_function_shape='ovo')
model.fit(train_data, train_label)
pred = model.predict(test_data)
print("测试集结果:")
print([i==j for i,j in zip(pred,test_label)])
train_score = model.score(train_data,train_label)
print("训练集分数:",train_score)
test_score = model.score(test_data,test_label)
print("测试集分数:",test_score)
print("\n")
res.append(test_score)
train_model(0.7,10,'ovo')
train_model(0.7,10,'ovr')
train_model(0.9,5,'ovo')
train_model(0.9,10,'ovo')
print("最好的超参数序号:",res.index(max(res))+1)
150 150 ============================================== 第1次训练 =============================================== 测试集结果: [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, True] 训练集分数: 1.0 测试集分数: 0.9555555555555556 ============================================== 第2次训练 =============================================== 测试集结果: [True, True, True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True] 训练集分数: 1.0 测试集分数: 0.9777777777777777 ============================================== 第3次训练 =============================================== 测试集结果: [True, False, True, True, True, True, True, True, True, True, True, True, True, True, True] 训练集分数: 0.9925925925925926 测试集分数: 0.9333333333333333 ============================================== 第4次训练 =============================================== 测试集结果: [True, True, True, True, True, True, True, True, True, True, True, False, True, True, True] 训练集分数: 1.0 测试集分数: 0.9333333333333333 最好的超参数序号: 2
X, y = random_data()
# fit the model, don't regularize for illustration purposes
clf = svm.SVC(kernel='linear', C=1)
clf.fit(X, y)
plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)
# plot the decision function
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)
# plot decision boundary and margins
ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
linestyles=['--', '-', '--'])
# plot support vectors,加圈圈
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
linewidth=1, facecolors='none', edgecolors='k')
plt.show()