Seaborn - 03 单变量分析绘图
程序员文章站
2022-07-14 10:06:25
...
import numpy as np
import pandas as pd
from scipy import stats, integrate
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)
np.random.seed(sum(map(ord, "distributions")))
拿到数据,首先看数据的分布情况/单变量单特征分析
x = np.random.normal(size=100)
sns.distplot(x,kde=False) #直方图/柱形图,kde:是否要做核密度估计
x = np.random.normal(size=100)
#sns.distplot(x,kde=False)
sns.distplot(x, bins=20, kde=False) #在x轴分为20份
1 数据分布情况
x = np.random.gamma(6, size=200)
sns.distplot(x, kde=False, fit=stats.gamma)
根据均值和协方差生成数据
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
print(df)
x y
0 -0.966779 1.224554
1 1.326123 0.467515
2 -1.233853 0.459449
3 -0.877749 0.512031
4 -1.682080 2.193876
.. ... ...
195 0.415857 0.815041
196 -0.141024 0.214063
197 -1.879956 0.599829
198 -1.056075 -0.086185
199 -0.786835 2.789600
[200 rows x 2 columns]
2 观测两个变量之间的分布关系最好用散点图
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sci
import seaborn as sns
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
sns.jointplot(x="x", y="y", data=df,kind="reg",color='g',stat_func=sci.pearsonr) # 显示皮尔逊相关系数,stat_func=sci.pearsonr必不可少
plt.show()
统计学三大相关系数之皮尔森(pearson)相关系数 https://blog.csdn.net/AlexMerer/article/details/74908435
http://seaborn.pydata.org/generated/seaborn.JointGrid.html#seaborn.JointGrid
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sci
import seaborn as sns
mean, cov = [0, 1], [(1, .5), (.5, 1)]
x, y = np.random.multivariate_normal(mean, cov, 1000).T
with sns.axes_style("white"):
sns.jointplot(x=x, y=y, kind="hex", color="k",stat_func=sci.pearsonr)
# 透过hex图,可以区分,那些点颜色更深,分布的点更多。更浅,分布的点更少
plt.show()
import seaborn as sns
iris = sns.load_dataset("iris")
sns.pairplot(iris)
对角线上:单变量分布情况
别的位置:两两之间关系的散点图
上一篇: 一图入门Matplotlib绘图
下一篇: SQL如何创建计算字段