微信好友个性标签词云--微信数据分析(四)

简述

代码

构建词云的时候,采用的背景图python

在这里插入图片描述

生成的效果为:web

在这里插入图片描述

能够发现,个人微信朋友们的虽然表面上看起来一个个都是逗比,可是个性标签彷佛都是慢慢的正能量哇~windows

  • 下面使用的时候,我用的是我以前已经打包好的数据文件
  • 若是你也想要使用的话,你须要根据上面的我给出的那个链接,先下载下数据先
  • 或者是根据下面代码直接调用那个函数就行了我本身写的那个
import itchat
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import jieba
from wordcloud import WordCloud, ImageColorGenerator
import numpy as np
import PIL.Image as Image


def login_and_getData(columns=[]):
    itchat.auto_login(hotReload=True)
    # 第一个就是本身
    friendList = itchat.get_friends(update=True)[1:]

    if len(columns) == 0:
        columns = friendList[0].keys()

    df = pd.DataFrame(columns=columns)
    val = [0] * len(friendList)

    for c in columns:
        for i in range(len(friendList)):
            val[i] = friendList[i][c]
        df[c] = val
    return df


def read(filename):
    try:
        return pd.read_excel(filename)
    except Exception:
        return pd.read_csv(filename)


if __name__ == '__main__':
    sns.set_palette('deep', desat=.6)
    # df = login_and_getData()
    # df.to_excel('wechat-1.xlsx')
    df = read('wechat-1.xlsx')
    plt.rcParams['font.sans-serif'] = ['SimHei']

    siglist = df['Signature'].dropna()

    # 有些是包括表情的标签 须要去爱哦
    ser = pd.Series(map(lambda x: re.sub('<span(.*?)/span>', '', x), siglist))
    # 将全部的标签都拼起来
    text = ''.join(ser)
    # 分词
    word_list = jieba.cut(text, cut_all=True)
    # 将分出来的词库也拼起来
    word_space_split = ' '.join(word_list)
    # 读取图片
    coloring = np.array(Image.open("wechat.jpg"))
    # 用这个图片做为边界
    # 设置font_path设置字体,同时为了显示中文
    my_wordcloud = WordCloud(background_color="white", max_words=2000,
                             mask=coloring, max_font_size=100, random_state=42, scale=2, font_path="C:/windows/Fonts/FZSTK.TTF").generate(word_space_split)

    # 获得这个图片的色彩分布
    image_colors = ImageColorGenerator(coloring)
    plt.imshow(my_wordcloud.recolor(color_func=image_colors))
    # 关闭横纵坐标
    plt.axis("off")
    # 显示图片
    plt.show()

加上权重的版本:微信

import itchat
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import jieba
import jieba.analyse
from wordcloud import WordCloud, ImageColorGenerator
import numpy as np
import PIL.Image as Image


def login_and_getData(columns=[]):
    itchat.auto_login(hotReload=True)
    # 第一个就是本身
    friendList = itchat.get_friends(update=True)[1:]

    if len(columns) == 0:
        columns = friendList[0].keys()

    df = pd.DataFrame(columns=columns)
    val = [0] * len(friendList)

    for c in columns:
        for i in range(len(friendList)):
            val[i] = friendList[i][c]
        df[c] = val
    return df


def read(filename):
    try:
        return pd.read_excel(filename)
    except Exception:
        return pd.read_csv(filename)


if __name__ == '__main__':
    sns.set_palette('deep', desat=.6)
    # df = login_and_getData()
    # df.to_excel('wechat-1.xlsx')
    df = read('wechat-1.xlsx')
    plt.rcParams['font.sans-serif'] = ['SimHei']

    siglist = df['Signature'].dropna()

    # 有些是包括表情的标签 须要去爱哦
    ser = pd.Series(map(lambda x: re.sub('<span(.*?)/span>', '', x), siglist))
    # 将全部的标签都拼起来
    text = ''.join(ser)
    text_dict = dict(jieba.analyse.extract_tags(text, withWeight=True))
    # 读取图片
    coloring = np.array(Image.open("wechat.jpg"))
    # 用这个图片做为边界
    # 设置font_path设置字体,同时为了显示中文
    my_wordcloud = WordCloud(background_color="white", max_words=2000,
                             mask=coloring, max_font_size=100, random_state=42, scale=2,
                             font_path="C:/windows/Fonts/FZSTK.TTF").generate_from_frequencies(text_dict)

    # 获得这个图片的色彩分布
    image_colors = ImageColorGenerator(coloring)
    plt.imshow(my_wordcloud.recolor(color_func=image_colors))
    # 关闭横纵坐标
    plt.axis("off")
    # 显示图片
    plt.show()