train_set=[["Sunny","Warm","Normal","Strong","Warm","Same"],
           ["Sunny","Warm","High","Strong","Warm","Same"],
           ["Rainy","Cold","High","Strong","Warm","Change"],
           ["Sunny","Warm","High","Strong","Cool","Change"]]
en=[1,1,0,1]
        
d={}           #用于寻找反值，min_specialize中
for x in zip(*train_set):
    tmp=list(set(x))
    if len(tmp)==1:   #strong
        continue
    i,j=tmp
    d[i]=j
    d[j]=i
    
#print(d)

def FindS(train_set):   #极大特殊假设
    ans=[]
    for i,train in enumerate(train_set):
        if en[i]==0: 
            continue
        if not ans: 
            ans=train.copy()
        else:
            for _ in range(len(ans)):
                if ans[_]!=train[_]:
                    ans[_]='?'    
    return ans

print("the result of find-s:")
print(FindS(train_set))
print("\n\n\n")


def more_normol(d,s,n=6):
    for h in s:
        for i in range(n):
            if h[i]=="∅": continue
            if d[i]=="?": continue
            if d[i]==h[i]: continue
            else:
                break
        else:             #正常匹配结束
            return True
            
    return False
    
def more_special(d,g,n=6):
    for h in g:
        for i in range(n):
            if d[i]=="∅": continue
            if h[i]=="?": continue
            if d[i]==h[i]: continue
            else:
                break
        else:             #正常匹配结束
            return True
            
    return False

def min_generalize(h,s,n=6):  #获得假设针对某个正例的极小泛化形式
    ans=[]
    for i in range(n):
        if h[i]=="∅":
            ans.append(s[i])
            continue
        if h[i]!=s[i]:
            ans.append("?")
        else:
            ans.append(h[i])
    return ans
    
def min_specialize(h,g,n=6):  #获得假设针对某个负例的极小特殊化形式
    ans=[]
    for i in range(n):
        if h[i]=="?":
            if g[i] in d:
                ans.append(h[:i]+[d[g[i]]]+h[i+1:])
            continue
        if h[i]!=g[i]:
            ans.append("?")
        else:
            ans.append(h[i])
    return ans
        
def agree_with(h,x,n=6):
    for i in range(n):
        if h[i]=="?": continue
        elif h[i]==x[i]: continue
        elif h[i]!=x[i]:
            return False
    return True



class ce:
    train_set=[]
    n=0
    
    def __init__(self,train_set,n):
        self.train_set=train_set
        self.n=n
        
    def run(self):
        g=[["?"]*self.n]
        s=[["∅"]*self.n]
        print("ce过程：",0,"\nG:",g,"\tS:",s)
        for cnt,train in enumerate(self.train_set):
            if en[cnt]:
                next_g=g
                for i,h in enumerate(g):
                    if not agree_with(h,train):
                        del next_g[i]
                g=next_g

                next_s=s
                for i,h in enumerate(s):
                    if not agree_with(h,train):
                        del next_s[i]
                        tmp=min_generalize(h,train)
                        if more_special(tmp,g):
                            next_s.append(tmp)
                        #print(next_s,min_generalize(h,train))
                s=next_s

            else:
                next_s=s
                for i,h in enumerate(s):
                    if agree_with(h,train):
                        del next_s[i]
                s=next_s

                next_g=g
                for i,h in enumerate(g):
                    if agree_with(h,train):  #因为正是个反例
                        del next_g[i]
                        tmp=min_specialize(h,train)
                        for d in tmp:
                            if more_normol(d,s):
                                next_g.append(d)
                        #print(next_s,min_specialize(h,train))
                g=next_g

            print("ce过程：",cnt+1,"\nG:",g,"\tS:",s)   
        # use 边界g and 边界s to generate res
        res=[]
        for gx in g:
            for sx in s:
                for i in range(len(gx)):
                    if gx[i]=="?":
                        if sx[i]!="?":
                            cur=[gx[:i]+[sx[i]]+gx[i+1:]]
                            if cur not in res:
                                res.append(cur)

        print("\n\n\nthe result of candidate-elimination:") 
        return res+g+s

    
# __name__是 python 的一个内置属性，记录了一个字符串
#若是在当前文件：print(__name__)  --> __main__


#如果在当前文件执行，则以下代码会被执行
if __name__ =='__main__':
    test=ce(train_set,6)
    print(test.run())


#不足：1.测试用例太少 2.候选消除法不完整

the result of find-s:
['Sunny', 'Warm', '?', 'Strong', '?', '?']


ce过程： 0 
G: [['?', '?', '?', '?', '?', '?']] 	S: [['∅', '∅', '∅', '∅', '∅', '∅']]
ce过程： 1 
G: [['?', '?', '?', '?', '?', '?']] 	S: [['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']]
ce过程： 2 
G: [['?', '?', '?', '?', '?', '?']] 	S: [['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']]
ce过程： 3 
G: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'Same']] 	S: [['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']]
ce过程： 4 
G: [['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']] 	S: [['Sunny', 'Warm', '?', 'Strong', '?', '?']]


the result of candidate-elimination:
[[['Sunny', 'Warm', '?', '?', '?', '?']], [['Sunny', '?', '?', 'Strong', '?', '?']], [['?', 'Warm', '?', 'Strong', '?', '?']], ['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['Sunny', 'Warm', '?', 'Strong', '?', '?']]

题目说明¶

代码实现¶