闲来无事想要了解下2018年的票房之王(国产片-内地),因而乎写了这么个小工具 先看一下结果:html
def getMovieList():
''' 2018年内地票房(国产片) '''
movie_params = {
'year': '2018',
'area': 'china',
'type': 'MovieRankingYear',
'category': 'china',
'page': 0,
'display': 'list',
'timestamp': '1553422026885',
'version': '07bb781100018dd58eafc3b35d42686804c6df8d',
'dataType': 'json'
}
url = 'http://movie.mtime.com/boxoffice/'
movie_list = []
#总共10页数据
for j in range(10):
movie_params['page'] = j
r = requests.get(url, params=movie_params).json()
soup = BeautifulSoup(r['html'], 'lxml')
movie_div = soup.find('div', {'class': 'boxofficelist'})
movie_dd = movie_div.find_all('dd')
for i in range(len(movie_dd)):
movie_name = movie_dd[i].find('h3').text
totalnum = movie_dd[i].find_all('p', {'class': 'totalnum'})[0].text
actors = movie_dd[i].find_all('p')[3].text[3:]
movie_list.append((movie_name, totalnum, actors))
return movie_list
复制代码
def write_movie_dsv(movies):
''' 将2018内地票房(国产片)列表写入csv文件中 '''
header=['电影名','票房','演员']
with open('movie_2018_chinalist.csv','w',newline='') as csvfile:
writer=csv.writer(csvfile)
writer.writerow(header)
for i,movie in enumerate(movies):
if (i+1)%10==0:
print ('已处理{}条记录(共{}条记录)'.format(i+1,len(movies)))
writer.writerow(movie)
复制代码
def write_actorTotalNum_dsv(movies):
''' 统计2018内地(国产片)演员的票房,并写入csv文件中 '''
movie_actors = []
actor_totalnum_temp = []
actor_totalnum = {}
header = ['演员', '票房(亿)']
with open('actor_totalnum_2018.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(header)
for movie in movies:
movie_actor = movie[2].split('/')
for i in range(len(movie_actor)):
if movie_actor[i] != '':
actor_totalnum_temp.append((movie_actor[i], movie[1]))
for i, j in actor_totalnum_temp:
if j[-1:] == '万':
actor_totalnum[i] = actor_totalnum.get(i, 0.0) + float(j[:-1]) * 0.0001
else:
actor_totalnum[i] = actor_totalnum.get(i, 0.0) + float(j[:-1])
actor_totalnum_temp = [(k, actor_totalnum[k]) for k in actor_totalnum]
actor_totalnum_temp.sort(key=lambda k: k[1], reverse=True)
print(actor_totalnum_temp)
for actor, totalnum in actor_totalnum_temp:
writer.writerow((actor, totalnum))
复制代码
def main():
''' 主函数 '''
#获取2018内地票房(国产片)的电影列表
movies = getMovieList()
#将获取的2018内地票房(国产片)的电影列表写入csv文件中
#write_movie_dsv(movies)
#统计2018内地(国产片)演员的票房,并写入csv文件中
write_actorTotalNum_dsv(movies)
#统计top10电影
#movie_data = pd.read_csv('movie_2018_chinalist.csv')
#统计2018内地(国产片)演员票房排行榜top10
actor_data=pd.read_csv('actor_totalnum_2018.csv')
# top10
top10_actor = actor_data.head(10)
# Kind表示画什么类型图(柱状图等)
top10_actor.plot(kind='bar', x='演员', y='票房(亿)', title='2018内地(国产片)演员票房排行榜top10', figsize=(20, 10))
plt.savefig('top10_actor2018_bar.png')
plt.show()
# 保存为csv文件
top10_actor.to_csv('top10_actor2018.csv', index=False)
if __name__=='__main__':
main()
复制代码
若是想要统计2019票房王,能够直接修改Params参数year为2019,可是下方的Page页数要根据实际状况修改成对应的页数。python