data.astype()javascript
给series从新指定索引的时候,可以和原来对得上的取其值,对不上的值为NaNcss
import pandas as pd
import numpy as np
import string
d1 = {"name":["xiaoming","xiaogang"],"age":[12,15],"tel":[10010,11186]}
t1 = pd.DataFrame(d1)
print(t1)
d2 = [{"name":"xiaoming","age":10,"tel":10010},{"name":"xiaogang","tel":10086},{"name":"xiaohong","age":13}]
t2 = pd.DataFrame(d2)
print(t2)
csv_data = pd.read_csv("./IMDB-Movie-Data.csv")
print(csv_data.columns)
#取须要的字段
csv_data = pd.DataFrame(csv_data,columns=["Rank","Title","Director","Actors","Votes"])
#按投票数排列
csv_data = csv_data.sort_values(by="Votes",ascending=False)
#loc根据index和column来取数据
print(csv_data.loc[:100,["Title","Votes"]]) #由于数据通过从新排序,因此取到的并不是前100行,而是取到index==100为止
#iloc根据行数和列数来取数据
print(csv_data.iloc[:100,[1,4]])
d3 = pd.DataFrame(np.arange(12).reshape(3,4),index=list(string.ascii_uppercase[:3]),columns=list(string.ascii_uppercase[-4:]))
d3.loc["B":"C","W":"X"]=np.nan
print(d3)
print(pd.notnull(d3["W"]))
d3.dropna(axis=0)
#当数据全为nan时才删除
d3.dropna(axis=0,how="all")
#有一个nan时就删除
d3.dropna(axis=0,how="any")
#inplace对原数据进行修改
d3.dropna(axis=1,inplace=True)
#pandas计算时会忽略nan
t2.fillna(t2.mean())
t2["age"] = t2["age"].fillna(t2["age"].mean())
print(t2)