构建词云的时候,采用的背景图python
生成的效果为:web
能够发现,个人微信朋友们的虽然表面上看起来一个个都是逗比,可是个性标签彷佛都是慢慢的正能量哇~windows
import itchat import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import re import jieba from wordcloud import WordCloud, ImageColorGenerator import numpy as np import PIL.Image as Image def login_and_getData(columns=[]): itchat.auto_login(hotReload=True) # 第一个就是本身 friendList = itchat.get_friends(update=True)[1:] if len(columns) == 0: columns = friendList[0].keys() df = pd.DataFrame(columns=columns) val = [0] * len(friendList) for c in columns: for i in range(len(friendList)): val[i] = friendList[i][c] df[c] = val return df def read(filename): try: return pd.read_excel(filename) except Exception: return pd.read_csv(filename) if __name__ == '__main__': sns.set_palette('deep', desat=.6) # df = login_and_getData() # df.to_excel('wechat-1.xlsx') df = read('wechat-1.xlsx') plt.rcParams['font.sans-serif'] = ['SimHei'] siglist = df['Signature'].dropna() # 有些是包括表情的标签 须要去爱哦 ser = pd.Series(map(lambda x: re.sub('<span(.*?)/span>', '', x), siglist)) # 将全部的标签都拼起来 text = ''.join(ser) # 分词 word_list = jieba.cut(text, cut_all=True) # 将分出来的词库也拼起来 word_space_split = ' '.join(word_list) # 读取图片 coloring = np.array(Image.open("wechat.jpg")) # 用这个图片做为边界 # 设置font_path设置字体,同时为了显示中文 my_wordcloud = WordCloud(background_color="white", max_words=2000, mask=coloring, max_font_size=100, random_state=42, scale=2, font_path="C:/windows/Fonts/FZSTK.TTF").generate(word_space_split) # 获得这个图片的色彩分布 image_colors = ImageColorGenerator(coloring) plt.imshow(my_wordcloud.recolor(color_func=image_colors)) # 关闭横纵坐标 plt.axis("off") # 显示图片 plt.show()
加上权重的版本:微信
import itchat import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import re import jieba import jieba.analyse from wordcloud import WordCloud, ImageColorGenerator import numpy as np import PIL.Image as Image def login_and_getData(columns=[]): itchat.auto_login(hotReload=True) # 第一个就是本身 friendList = itchat.get_friends(update=True)[1:] if len(columns) == 0: columns = friendList[0].keys() df = pd.DataFrame(columns=columns) val = [0] * len(friendList) for c in columns: for i in range(len(friendList)): val[i] = friendList[i][c] df[c] = val return df def read(filename): try: return pd.read_excel(filename) except Exception: return pd.read_csv(filename) if __name__ == '__main__': sns.set_palette('deep', desat=.6) # df = login_and_getData() # df.to_excel('wechat-1.xlsx') df = read('wechat-1.xlsx') plt.rcParams['font.sans-serif'] = ['SimHei'] siglist = df['Signature'].dropna() # 有些是包括表情的标签 须要去爱哦 ser = pd.Series(map(lambda x: re.sub('<span(.*?)/span>', '', x), siglist)) # 将全部的标签都拼起来 text = ''.join(ser) text_dict = dict(jieba.analyse.extract_tags(text, withWeight=True)) # 读取图片 coloring = np.array(Image.open("wechat.jpg")) # 用这个图片做为边界 # 设置font_path设置字体,同时为了显示中文 my_wordcloud = WordCloud(background_color="white", max_words=2000, mask=coloring, max_font_size=100, random_state=42, scale=2, font_path="C:/windows/Fonts/FZSTK.TTF").generate_from_frequencies(text_dict) # 获得这个图片的色彩分布 image_colors = ImageColorGenerator(coloring) plt.imshow(my_wordcloud.recolor(color_func=image_colors)) # 关闭横纵坐标 plt.axis("off") # 显示图片 plt.show()