逻辑斯蒂回归(logistic regression)是统计学习中的经典 分类 方法。
上式的 $\mu=0, \gamma=1$
$$F(x)=\frac{1}{1+e^{-x}}$$二项逻辑斯蒂回归模型是如下的条件概率分布:
$$P(Y=1|x)\frac{exp(w\cdot x+b)}{1+exp(w\cdot x+b)}$$$$P(Y=0|x)\frac{1}{1+exp(w\cdot x+b)}$$这里,x 是输入,Y 是输出。参数 w 称为权值向量,b 称为偏置
import numpy as np
from random import shuffle
def random_data():
np.random.seed(1)
X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
Y = [0] * 20 + [1] * 20
return X, Y
def data_split(X, Y, n=40, k=0.8):
nums = [i for i in range(n)]
shuffle(nums)
p = int(n*k)
train_data, train_label = [X[i] for i in nums[:p]], [Y[i] for i in nums[:p]]
test_data, test_label = [X[i] for i in nums[p:]], [Y[i] for i in nums[p:]]
return np.array(train_data), np.array(train_label), np.array(test_data), np.array(test_label)
class lo:
def __init__(self, w, b):
self.w = w
self.b = b
self.lda = 1e-2
def sigmoid(self,x):
e = np.exp(np.dot(x, self.w)+self.b)
return e/(1+e)
def score(self,X,Y):
_Y = np.array([int(self.sigmoid(x)>=0.5) for x in X])
return "{:.2%}".format(np.sum(Y==_Y)/len(Y))
def gradw(self,x,y):
e = np.exp(np.dot(x, self.w)+self.b)
return (e/(1+e)-y)*x
def gradb(self,x,y):
e = np.exp(np.dot(x, self.w)+self.b)
return e/(1+e)-y
def fit(self,X,Y):
for i in range(15):
for j in range(X.shape[0]):
self.w -= self.lda*self.gradw(X[i,:],Y[i])
self.b -= self.lda*self.gradb(X[i,:],Y[i])
#print(len(self.w))
print(self.score(X,Y),self.w,self.b)
if __name__ == "__main__":
X,Y = random_data()
train_data,train_label, test_data,test_label = data_split(X, Y)
model = lo(np.random.randn(train_data.shape[1]),0)
model.fit(train_data,train_label)
print("="*100)
print(model.score(test_data,test_label))
96.88% [-0.04872157 0.31090625] 0.13380214553301567 100.00% [0.07882197 0.49509809] 0.20378410403418268 100.00% [0.32194493 0.49276714] 0.30753819682228856 100.00% [0.40630944 0.58209314] 0.27657173693929776 100.00% [0.50344239 0.5960984 ] 0.2994975044743826 100.00% [0.53508341 0.63515726] 0.3143461233001315 100.00% [0.5704154 0.70317153] 0.3553905689897992 100.00% [0.64863162 0.75385747] 0.3211444540221053 100.00% [0.65888647 0.78063784] 0.3293522797645416 100.00% [0.67199167 0.80251838] 0.33706963361816483 100.00% [0.68834167 0.82239165] 0.33062277584674854 100.00% [0.69637239 0.83068294] 0.33344738123604567 100.00% [0.70568679 0.8424407 ] 0.33751572201248836 100.00% [0.72227402 0.86537936] 0.32972662577827255 100.00% [0.75116161 0.88174074] 0.32049322051109497 ==================================================================================================== 100.00%