Python 中文词频统计
import jiebatxt = open("in.txt", "r", encoding='utf-8').read()words= jieba.lcut(txt)counts = {}for word in words:if len(word) == 1:continueelse:counts[word] = counts.get(word,0) + 1items = list(counts
·
import jieba
txt = open("in.txt", "r", encoding='utf-8').read()
words = jieba.lcut(txt)
counts = {}
for word in words:
if len(word) == 1:
continue
else:
counts[word] = counts.get(word,0) + 1
items = list(counts.items())
items.sort(key=lambda x:x[1], reverse=True)
for i in range(2000)://统计数量
word, count = items[i]
#print ("{0:<10}{1:>5}".format(word, count))
t=open('count.txt', 'a',encoding='utf-8')
t.write("{0:<10}{1:>5}".format(word, count)+"\n")
更多推荐



所有评论(0)