import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm
from sklearn.metrics import classification_report 
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split


def iris_data():
    from sklearn.datasets import load_iris
    iris = load_iris()
    return iris.data, iris.target

def toy_data():
    X = np.array([[3, 3], [4, 3], [1, 1]])  
    Y = np.array([1, 1, -1])
    return X, Y  

def random_data():
    np.random.seed(2333)
    X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]  # np.r_ 将两矩阵上下相连接起来
    Y = [0] * 20 + [1] * 20
    return X, Y


#X, Y = toy_data()
X, Y = random_data()
#X, Y = iris_data()
#print(X,Y)
cnt = 0

def train_model(para1, para2):
    global cnt
    cnt += 1
    print("{:=^100}".format(" 第"+ str(cnt) +"次训练 "))
    
    model = svm.SVC(kernel = para1, C = para2)
    model.fit(X,Y)
    
    pred = model.predict(X)    
    print(classification_report(Y, pred))
    
train_model("linear",1)
train_model("linear",5)
train_model("poly",5)

============================================== 第1次训练 ===============================================
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        20

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40

============================================== 第2次训练 ===============================================
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        20

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40

============================================== 第3次训练 ===============================================
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        20

    accuracy                           1.00        40
   macro avg       1.00      1.00      1.00        40
weighted avg       1.00      1.00      1.00        40


X, Y = iris_data()
#print(X,Y)
print(len(X),len(Y))
# print(classification_report(Y, pred))
# print(Y)
# print(pred)
# print(Y == pred)

cnt = 0
res = []

def train_model(size,ga,df):
    global cnt,res
    cnt += 1
    print("{:=^100}".format(" 第"+ str(cnt) +"次训练 "))
    
    train_data,test_data,train_label,test_label=train_test_split(X,Y,train_size=size)
    
    model = svm.SVC(C=2,kernel='rbf',gamma=ga,decision_function_shape='ovo')    
    model.fit(train_data, train_label)
    
    pred = model.predict(test_data)
    print("测试集结果：")
    print([i==j for i,j in zip(pred,test_label)])
    
    train_score = model.score(train_data,train_label)
    print("训练集分数：",train_score)
    test_score = model.score(test_data,test_label)
    print("测试集分数：",test_score)
    print("\n")
    res.append(test_score)
    
    
train_model(0.7,10,'ovo')
train_model(0.7,10,'ovr')
train_model(0.9,5,'ovo')
train_model(0.9,10,'ovo')
print("最好的超参数序号：",res.index(max(res))+1)

150 150
============================================== 第1次训练 ===============================================
测试集结果：
[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, True]
训练集分数： 1.0
测试集分数： 0.9555555555555556


============================================== 第2次训练 ===============================================
测试集结果：
[True, True, True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]
训练集分数： 1.0
测试集分数： 0.9777777777777777


============================================== 第3次训练 ===============================================
测试集结果：
[True, False, True, True, True, True, True, True, True, True, True, True, True, True, True]
训练集分数： 0.9925925925925926
测试集分数： 0.9333333333333333


============================================== 第4次训练 ===============================================
测试集结果：
[True, True, True, True, True, True, True, True, True, True, True, False, True, True, True]
训练集分数： 1.0
测试集分数： 0.9333333333333333


最好的超参数序号： 2


X, y = random_data()

# fit the model, don't regularize for illustration purposes
clf = svm.SVC(kernel='linear', C=1)
clf.fit(X, y)

plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired)

# plot the decision function
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()

# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)

xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf.decision_function(xy).reshape(XX.shape)

# plot decision boundary and margins
ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
           linestyles=['--', '-', '--'])
# plot support vectors，加圈圈
ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
           linewidth=1, facecolors='none', edgecolors='k')
plt.show()


svm.LinearSVC	Linear Support Vector Classification.
svm.LinearSVR	Linear Support Vector Regression.
svm.NuSVC	Nu-Support Vector Classification.
svm.NuSVR	Nu Support Vector Regression.
svm.SVC	C-Support Vector Classification.
svm.SVR	Epsilon-Support Vector Regression.
svm.OneClassSVM	Unsupervised Outlier Detection.

sklearn.svm¶

sklearn.svm.SVC¶

（1）参数说明¶

（2）属性说明¶

（3）方法¶

一、使用核函数和罚项¶

二、超参数配置¶

三、可视化¶

plt.scatter 参数说明¶

plt.gca 说明¶

np.meshgrid 说明¶

np.ravel 说明¶