用python编写一段正向最大匹配分词的程序
cidian = set()
with open("word.txt","r",encoding="utf-8") as f:
ci = f.readline().strip("\n")
while ci:
#print(len(ci),ci[0],ci[-1])
cidian.add(ci)
ci = f.readline().strip("\n")
def main(s):
n = len(s)
ans = []
l = 0
while l < n:
tmp = s[l]
for r in range(l,n):
cur_ci = s[l:r+1]
print(cur_ci,l,r+1)
if cur_ci in cidian:
tmp = cur_ci
ans.append(tmp)
l += len(tmp)
return ans
main("沧浪寄余生")
# set 查找效率近似于O(1),同 dict
# strip()去除字符首尾的指定字符
# cidian = set([i.strip() for i in open().readlines()])
沧 0 1 沧浪 0 2 沧浪寄 0 3 沧浪寄余 0 4 沧浪寄余生 0 5 寄 2 3 寄余 2 4 寄余生 2 5 余 3 4 余生 3 5
['沧浪', '寄', '余生']