Notes-Python for Data Science and Machine Learning Bootcamp notes

matplotlib、seaborn

 

一、matplotlib

anaconda安装 matplotlib:  conda  install matplotlib
python 下安装matplotlib: pip install matplotlib
import matplotlib.pyplot asplt
%matplotlib inline
import numpy as np
x=np.linspace(0,5,11)
y=x**2
#functional 
plt.plot(x,y,'r-')
plt.xlabel('xlabel‘)
plt.ylabel('ylabel')
plt.title('title')
plt.show()  # if not in the jypyter notebook
#plot many plots  functions
plt.subplot(1,2,1)
plt.plt(x,y,'r')

plt.subplot(1,2,2)
plt.plot(y,x,'b')

plt.show()
# figure object
fig=plt.figure()

axes=fig.add_axes([0.1,0.1,0.8,0.8])  # add the axes
axes.plot(x,y) #plot the axes
axes.set_xlabel('x label')
axes.set_ylabel('y label')
axes.set_title('set title‘)
# plot many plots ,the plots in diffent axes
fig=plt.figure()
axes1=fig.add_axes([0.1,0.1,0.8,0.8])
axes2=fig.add_axes([0.2,0.5,0.4,0.3])

axes1.plot(x,y)
axes2.plot(y,x)

axes1.set_title('larger plot')
axes2.set_title('smaller plot')
# subplots
fig,axes=plt.subplots(nrows=1,ncols=2)  # 3*3plots

for current_ax in axes:
    current_ax.plot(x,y)
fig.axes=plt.subplots(nrows=1,ncols=2)
axes[0].plot(x,y)
axes[0].set_title('')

axes[1].plot(y,x)
axes[1].set_title('second plot’)
# figure size and dpi

fig=plt.figure(figsize=(8,2))

ax=fig.add_axes([0,0,1,1])
ax.plot(x,y)
#figure size and dpi
fig.axes=plt.subplots(nrows=2,figsize=(8,2))
axes[0].plot(x,y)
axes[1].plot(y,x)
plt.tight_layout()
#save the figure
fig.savefig('my_picture.png',dpi=200)
#figure size and dpi
fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
ax.plot(x,y)

ax.set_tilte()
ax.set_xlabel()
ax.set_ylabel()
# figure size and dpi
fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
ax.plot(x,x*9,label='x squared')
ax.plot(x,x*3,label='x cubed')
ax.legend(loc=(0.1,0.1))
#plot appearance
fig=plt.figure()
ax=fig.add_axes([0,0,1,1,])
ax.plot(x,y,color='blue')== ax.plot(x,y,color='#fffff')
#plot appearance  line marke style
fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
ax.plot(x,y,color='purple',linewidth=3,aplha=0.5)
#ax.plot(x,y,color='purple',lw=3,alpha=0.5)
ax.plt(x,y,color='purple',lw=3,linestyle='--')
ax.plot(x,y,color='purple',lw=3,ls='--')
ax.plot(x,y,color='purple',lw=3,ls='-',marker='2',markersie=10,markerfacefolor='yellow',markeredgewith=3,markeredgecolor='gree')# marker and markersize
# apperanrance style xlimt,ylimt

fig=plot.figure()
ax=fig.add_axes()
ax.plot(x,y,color='purple',lw=2)
ax.set_xlim([0,1])
ax.set_ylim([0,1])
# special plot types
plt.scatter(x,y)



from random import sample
data=sample(range(1,1000),100)
plt.hist(data)


data=[np.random.normal(0,std,100) for std in range(1,4)

#retangular box plot
plt.boxplot(data,vert=Ture,patch_artist=True);

further readingnode

http://www.loria.fr/~rougier/teaching/matplotlibpython

#matplotlib exercises

#data
import numpy as np
x=np.arange(0,100)
y=x*2
z=x**2

import matplotlinb.pyplot as plt
%matplpot inline

fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
ax.plot(x,y)

ax.set_alabel('x')
ax.set_ylabel('title)

##########################

#exerise 2

fig=plt.figure()
ax1=fig.add_axes([0,0,1,1])
ax2=fig.add_axes(0.2,0.5,0.2,0.2])

ax1.plot(x,y,color='red')
ax2.plot(x,y,color='red')



#########################333

#exercise 3
fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
ax2=fig.add_axes([0.2,0.5,0.4,0.4])

ax.plot(x,z)
ax.set_xlabel(‘x’)
ax.set_ylabel('z')

ax2.plot(x,y)
ax2.set_title('zoom')
ax2.set_xlabel('x')
ax2.set_ylabe('y')
ax2.set_xlim([20,22])
ax2.set_ylim([30,50])

##########################33

# exerciser 4.01

fig.axes=plt.subplots(1,2)
axes[0].plot(x,y,ls='-',color='blue',lw=3)

axes[1].plot(x,z,color='red,lw=3)


# exerciser 4.01

fig.axes=plt.subplots(1,2,figsize=(12,2))
axes[0].plot(x,y,ls='-',color='blue',lw=3)

axes[1].plot(x,z,color='red,lw=3)

二、seaborn

anconda 安装 seaborn: conda install seaborn
 python 环境下安装seaborn: pip install seaborn
# Distribtion Plots
import seaborn as sns
%matplorlib inline
tips=sns.load_dataset('tips')
tips.head()

sns.displot(tips['total_bill'])

sns.displot(tips['total_bill'],kde=False,bins=30)


################

sns.jointplot(x='total_bill',y='tip',data=tips)
sns.jointplot(x='total-bill',y='tip',data=tips,kind='hex')
sns.jointplot(x='total-bill',y='tip',data=tips,kind='reg')

#######################

sns.pairplot(tips)
sns.pairplot(tips,hue='sex',palette='coolwards')

############################

sns.rugplot(tips['total_bill'])


##############

sns.kdeplot(tips['total_bill'])


###########################################

#Categorical plots

import seaborn as sns
import numpy as np
% matplotlib inline
tips=sns.load_dataset('tops')
tips.head()

sns.barplot(x='sex',y='total_bill',data=tips)

sns.barplot(x='sex',y='total_bill',data=tips,estimator=np.std)

sns.countplot(X='sex',data=tips)

sns.boxplot(x='day',y='total_bill',data=tips)

sns.boxplot(x='day',y='total_bill',data=tips,hue='smoker')

sns.violinplot(x='day',y='total_bill',data=tips)

sns.violinplot(x='day',y='total_bill',data=tips,hue='sex',spit=True)

sns.stripplot(x='day',y='total_bill',data=tips,jitter=True,hue='sex',split=True)

#######
sns.violinplot(x='day',y='total_bill',data=tips)
sns.swarmplot(x='day',y='total_bill',data=tips,color='black')
##########

sns.factorplot(x='day',y='total_bill',data=tips,kind='bar')

#####################################################

#Matrix Plots

import seaborn as sns
%matplotlib inline
tips=sns.load_dataset('tips')
flights=sns.load_dataset('flights')
tips.head()
flights.head()

tc=.corr()
sns.heatmap(tc)
sns.heatmap(tc,annot=True,cmap='colorwarm')

flights.pivot_table(index='month',columns='years',values='passengers')

fp=flights.pivot_table(index='month',columns='years',values='passengers')
sns.headmap(fp)
sns.heatmap(fp,cmap='magma',linecolor='white',linwidths=1)

sns.clustermap(fp)
sns.clustermap(fp,cmap='coolwarm',standard_scale=1)



############################################################
####GRID

import seaborn as sns
%matplotlib inline
iris=sns.load_dataset('iris')
iris.head()

sns.pairplot(iris)
sns.PairGrid(iris)
g=snsn.PairGrid(iris)
g.map(plt.scatter)
g.map_diag(sns.displot)
g.map_upper(plt.scatter)
g.map_lower(sns.kedplot)


tips=sns.load_dataset('tips')
tips.head()
g=sns.FaceGrid(data=tips,col='time',row='smoker')
g.map(sns.distplot,'total_bill')
g.map(plt.scatter,'total_bill','tip')

##############################################
#regression  plots
import seaborn sa sns
%matplotlib inline
tips=sns.load_dataset('tips')
tips.head()
sns.lmplot(x='total_bill',y='tip',data=tips,hue='sex',markers=['o','v'],scatter_kws={'s:100})

sns.lmplot(x='total_bill',y='tip',data=tips,col='sex',row='time')
sns.lmplot(x='total_bill',y='tip',data=tips,col='day',hue='sex',aspect=0.6,size=8)


#################################
#style and color
import seaborn as sns
%matplotlib inline
tips=sns.load_datasets(tips)

sns.setstyle('white') # whitegrid 
sns.countplot(x='sex',data=tips)

sns.set_style('tickes')
sns.despine(left=Ture,riht=False)

plt.figure=(figsize=(12,3))
sns.countplot(x='sex',data=tips)

sns.set_context('poster',font_scal=13) #notbook
sns.countplot(x='sex',data=tips)


sns.lmplot(x='total_bill',y=tip,data=tips,hue='sex',palette='coolwarm')#semisc


######################################
#seaborn exercises

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_style('whitegrid')
titanic=sns.load_ dataset('titanic')

sns.jointplot(x='fare',y='age',data=titanic)

sns.distplot(titanic['fare'],ked=False,color='red',bins=30)

sns.boxplot(x='class',y='age',data=titanic,palette='rainbow')

sns.swrmplot(x='class',y='age',data=titanic,palette='Set2')

sns.countplot(x='sex',data=titanic)

sns.heatmap(titanic.corr(),cmap='collwarm')
plt.title('titanic’)

sns.FaceGrid(data=titanic,col='sex')
g.map(plt.hist,'age')

g.map(sns.displot,'age')

pandas data visualization exercise solutionsapp

import pandas as pd
import matplotlib.pyplot as plt
df3=pd.read_csv('df3')

%matplotlib inline

df3.plot.scatter(x='a',y='b',s=50,c='red',figsize=(10,10))

df3['a'].hist()

df3['a'].plot.hist()

plt.style.use('ggplot')

df3['a'].plot.hist(bins=20,alpha=0.5)

df3['d'].plot.kde(lw=5,ls='--')

df3.ix[0:30].plot.area()


#####
f=plt.figure()
df3.ix[0:30].plot.area(alpha=0.4)
plt.legend(loc='center left',bbox_to_anchor=(1.0,0.5))
plt.show()
#######


######################################
#pandas built-in data visualization

import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline
df1=pd.read_csv('df1',index_col=0)
df2=pd.read_csv('df2')

df1['A'].hist(bins=30)

df1['A'].plot(kind='hist',bins=30)

df1['A'].hist()

df2.plot.bar()

df.plt.bar(stacked=True)

df1['A'].plot.hist(bins=50)

df1.head()

df1.plot.line(x=df1,index,y='B')
df2.plot.area()

df1.plot.line(x=df1,inde,y='B',figsize=(12,10))

df1.plot.scatter(x='A',y='B',c='C',cmap='coolwarm')

df1.plot.scatter(x='A',y='B',s=df1['C']*10)



df2.plot.box()

df=pd.DataFrame(np.random.randn(1000,2),columns=['a','b'])

df.plot.hexbin(x='a',y='b',gridsize=25,cmap='coolwarm')

df2['a'].plot.kde()

df2['a'].plot.density()

df2.plot.density()

############################################

df2.plot.density()

plotly and cufflinks

python 下安装 plotly 和 cufflinks:(使用pip安装完成后,在jupyter note book 中能正常使用)
pip install plotly
pip install cufflinks

anaconda 安装 plotly,,命令
conda install -c https://conda.anaconda.org/plotly 
或 conda install -c plotly plotly=3.6.0
import pandas as pd
import numpy as np
from ploty import __version__

print(__version__)

#######
import cufflinks as cf
from plotly.offline import download_plotlyjs,init_notebook_node,plot,iplot

init_notebook_mode(connected=True)

cf.go_offline()

###DATA
df=pd.DataFrame(np.random.randn(100,4),columns='A B C D'.split())
df.head()

df2=pd.DataFrame({'Category':['A','B','C'],'Values':[32,43,50]})

df2.plot()

df.plot()

%matplotlib inline

df.iplot()

df.iplot(kind='scatter',x='A',y='B',mode='markers',size=20)

df2.iplot(kind='bar',x='Category',y='values')

df.iplot.(kind='bar')

df.count().iplot(kind='bar')

df.sum().iplot(kind='bar')

df.iplot(kind='box')

df3=pd.DataFrame({'x':[1,2,3,4,5],'y':[10,20,30,20,10],'z':[500,400,300,200,100]})

df3.iplot(kind='surface',colorscale='rdylbu')

df['A'].iplot(kind='hist',bins=50)

df.iplot(kind='hist')

df[['A','B']].iplot(kind='spread')

df.ipot(kind='bubble',x='A',y='B',size='C')


df.scatter_matrix()

Geographical plotting

 

method 1: plotly 
method 2: matplotlib basemap

#################

choropleth maps

#################################
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot

init_notebook_mode(connected=True)
import pandas as pd

data=dict(type='choropleth',locations=['A2','CA','WV'],
locationmode='USA-states',
colorscale='Porland',
text=['text 1','text 2','text 3'],
z=[1.0,2.0,3.0],
colorbar={'title:'Colorbar Title Goes Here'})
)

layout=dict(geo=['scope':'usa'})
choromap=go.Figure(data=[data],layout=layout)
iplot(choromap)