用python实现概念学习中的Find-S算法和候选消除算法。
(1)训练样例可以使用ppt中的四个训练样例作为训练集;
(2)输出结果为与训练集一致的假设变型空间(version space)。
提示:
(1)训练样例和假设可以使用长度为6的list或者tuple表示;
(2)属性取值可以直接使用字符串;
(3)定义一些辅助函数,如:
假设的最小泛化(获得假设针对某个正例的极小泛化形式,min_generalize),
最小特殊化(获得假设针对某个负例的极小特殊化形式,min_specialize),
比较函数(比较两个假设一个是否比另一个更一般,is_general),
样例是否满足假设判定函数(agree_with)等等
(4)算法流程与实例请参考ppt相关内容
有点烂,写得有些久了
train_set=[["Sunny","Warm","Normal","Strong","Warm","Same"],
["Sunny","Warm","High","Strong","Warm","Same"],
["Rainy","Cold","High","Strong","Warm","Change"],
["Sunny","Warm","High","Strong","Cool","Change"]]
en=[1,1,0,1]
d={} #用于寻找反值,min_specialize中
for x in zip(*train_set):
tmp=list(set(x))
if len(tmp)==1: #strong
continue
i,j=tmp
d[i]=j
d[j]=i
#print(d)
def FindS(train_set): #极大特殊假设
ans=[]
for i,train in enumerate(train_set):
if en[i]==0:
continue
if not ans:
ans=train.copy()
else:
for _ in range(len(ans)):
if ans[_]!=train[_]:
ans[_]='?'
return ans
print("the result of find-s:")
print(FindS(train_set))
print("\n\n\n")
def more_normol(d,s,n=6):
for h in s:
for i in range(n):
if h[i]=="∅": continue
if d[i]=="?": continue
if d[i]==h[i]: continue
else:
break
else: #正常匹配结束
return True
return False
def more_special(d,g,n=6):
for h in g:
for i in range(n):
if d[i]=="∅": continue
if h[i]=="?": continue
if d[i]==h[i]: continue
else:
break
else: #正常匹配结束
return True
return False
def min_generalize(h,s,n=6): #获得假设针对某个正例的极小泛化形式
ans=[]
for i in range(n):
if h[i]=="∅":
ans.append(s[i])
continue
if h[i]!=s[i]:
ans.append("?")
else:
ans.append(h[i])
return ans
def min_specialize(h,g,n=6): #获得假设针对某个负例的极小特殊化形式
ans=[]
for i in range(n):
if h[i]=="?":
if g[i] in d:
ans.append(h[:i]+[d[g[i]]]+h[i+1:])
continue
if h[i]!=g[i]:
ans.append("?")
else:
ans.append(h[i])
return ans
def agree_with(h,x,n=6):
for i in range(n):
if h[i]=="?": continue
elif h[i]==x[i]: continue
elif h[i]!=x[i]:
return False
return True
class ce:
train_set=[]
n=0
def __init__(self,train_set,n):
self.train_set=train_set
self.n=n
def run(self):
g=[["?"]*self.n]
s=[["∅"]*self.n]
print("ce过程:",0,"\nG:",g,"\tS:",s)
for cnt,train in enumerate(self.train_set):
if en[cnt]:
next_g=g
for i,h in enumerate(g):
if not agree_with(h,train):
del next_g[i]
g=next_g
next_s=s
for i,h in enumerate(s):
if not agree_with(h,train):
del next_s[i]
tmp=min_generalize(h,train)
if more_special(tmp,g):
next_s.append(tmp)
#print(next_s,min_generalize(h,train))
s=next_s
else:
next_s=s
for i,h in enumerate(s):
if agree_with(h,train):
del next_s[i]
s=next_s
next_g=g
for i,h in enumerate(g):
if agree_with(h,train): #因为正是个反例
del next_g[i]
tmp=min_specialize(h,train)
for d in tmp:
if more_normol(d,s):
next_g.append(d)
#print(next_s,min_specialize(h,train))
g=next_g
print("ce过程:",cnt+1,"\nG:",g,"\tS:",s)
# use 边界g and 边界s to generate res
res=[]
for gx in g:
for sx in s:
for i in range(len(gx)):
if gx[i]=="?":
if sx[i]!="?":
cur=[gx[:i]+[sx[i]]+gx[i+1:]]
if cur not in res:
res.append(cur)
print("\n\n\nthe result of candidate-elimination:")
return res+g+s
# __name__是 python 的一个内置属性,记录了一个字符串
#若是在当前文件:print(__name__) --> __main__
#如果在当前文件执行,则以下代码会被执行
if __name__ =='__main__':
test=ce(train_set,6)
print(test.run())
#不足:1.测试用例太少 2.候选消除法不完整
the result of find-s: ['Sunny', 'Warm', '?', 'Strong', '?', '?'] ce过程: 0 G: [['?', '?', '?', '?', '?', '?']] S: [['∅', '∅', '∅', '∅', '∅', '∅']] ce过程: 1 G: [['?', '?', '?', '?', '?', '?']] S: [['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']] ce过程: 2 G: [['?', '?', '?', '?', '?', '?']] S: [['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']] ce过程: 3 G: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'Same']] S: [['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']] ce过程: 4 G: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']] S: [['Sunny', 'Warm', '?', 'Strong', '?', '?']] the result of candidate-elimination: [[['Sunny', 'Warm', '?', '?', '?', '?']], [['Sunny', '?', '?', 'Strong', '?', '?']], [['?', 'Warm', '?', 'Strong', '?', '?']], ['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['Sunny', 'Warm', '?', 'Strong', '?', '?']]