(相关数据见github)html
import pandas as pd
path1 = "./data/chipotle.tsv" # chipotle.tsv
chipo = pd.read_csv(path1, sep = '\t')
chipo.tail() #查看最后五行 head()可查看前五行
输出:python
chipo.info()
输出:git
# 查看数据大小 chipo.shape # 行列数 # chipo.shape[0] # 行数 # chipo.shape[1] # 列数
输出:github
(4622, 5)
chipo.columns
输出:app
Index(['order_id', 'quantity', 'item_name', 'choice_description',
'item_price'],
dtype='object')
chipo.index
输出:ui
RangeIndex(start=0, stop=4622, step=1)
# 以item_name分组 并对quantity求和 c = chipo[['item_name','quantity']].groupby(['item_name'],as_index=False).agg({'quantity':sum}) c.sort_values(['quantity'],ascending=False,inplace=True) c.head()
输出:spa
chipo['item_name'].nunique()
输出:code
50
chipo['choice_description'].value_counts().head()
输出:htm
[Diet Coke] 134
[Coke] 123
[Sprite] 77
[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream, Lettuce]] 42
[Fresh Tomato Salsa, [Rice, Black Beans, Cheese, Sour Cream, Guacamole, Lettuce]] 40
total_items_orders = chipo['quantity'].sum() total_items_orders
输出:blog
4972
dollarizer = lambda x: float(x[1:-1]) chipo['item_price'] = chipo['item_price'].apply(dollarizer)
# 价格乘以数量 再求和 chipo['sub_total'] = round(chipo['item_price'] * chipo['quantity'],2) chipo['sub_total'].sum()
输出:
39237.02
chipo['order_id'].nunique()
输出:
1834
chipo[['order_id','sub_total']].groupby(by=['order_id']).agg({'sub_total':'sum'})['sub_total'].mean()
输出:
21.39423118865867
chipo['item_name'].nunique()
输出:
50
一、http://pandas.pydata.org/pandas-docs/stable/cookbook.html#cookbook
二、https://www.analyticsvidhya.com/blog/2016/01/12-pandas-techniques-python-data-manipulation/