import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
#AI2017.txt来自http://www.gov.cn/zhengce/content/2017-07/20/content_5211996.htm
with open('AI2017.txt', 'r') as f:
renmin=f.read()
jieba.load_userdict("AIDict.txt") #添加词典
seg_list = jieba.cut(renmin, cut_all=False) #分词
tf = {} #统计词频
for seg in seg_list:
if seg in tf: # 如果该键在集合tf的对象中,则该键所属对象值加1
tf[seg] +=1
else: #否则,生成新词的键值对,初始值为1
tf[seg] = 1
ci=list(tf.keys()) #将字典的健值转为列表
with open('stopword.txt','r') as ft:
stopword=ft.read()
for seg in ci:
if tf[seg]<5 or len(seg)<2 or seg in stopword or '一'in seg:
tf.pop(seg)
#print(tf)
#统计词频后绘制词云图
from wordcloud import WordCloud,ImageColorGenerator
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from scipy.misc import imread
mask_img=np.array(Image.open("heart.png"))
font=r'c:\Windows\Fonts\simfang.ttf'
wc=WordCloud(background_color="white",mask=mask_img,collocations=False,font_path=font, max_font_size=200,width=1600,height=500,margin=0).generate_from_frequencies(tf)
plt.imshow(wc)
plt.axis('off')
plt.show()
# 基于彩色图像生成相应彩色
image_colors = ImageColorGenerator(mask_img)
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis('off')
plt.show()
wc.to_file('AI.jpg')