确定常用单词的各种字母组合频率
1.将单词切分成不同的字母组合
2.计算每个字母组合出现的次数
编码实现
#coding=utf-8
import fileinput
from collections import Counter
testword="find"
minslice=2
maxslice=5
cutnumber=2
def cut_word(inword,slicenumber):
ddd=[]
lenb=len(inword)
for i in range(0,lenb,slicenumber):
abc= inword[i:i+slicenumber]
if len(abc)==slicenumber:
ddd.append(abc)
return ddd
def get_slice_list(inword,slicenumber):
lll=[]
lena=len(inword)
for i in range(lena):
aaa=inword[i:]
lll.extend(cut_word(aaa,slicenumber))
mylist=list(set(lll))
return mylist
def get_all_slice_list(inword):
lll=[]
for i in range(minslice,maxslice+1):
lll.extend(get_slice_list(inword,i))
return lll
alllist=[]
for line in fileinput.input( "1.txt" ):
aaa=line.rstrip()
print "read :%s"%(aaa)
alllist.extend(get_all_slice_list(aaa))
c=Counter(alllist).most_common()
print "##########begin write file..."
output = open('jieguo.txt', 'w')
for i in c:
output.write("%s,%d\n"%i)
output.close()
print "##########write end"