python实现参数估计
程序员文章站
2022-05-09 10:53:24
...
1.导入数据
import numpy as np
import pandas as pd
from scipy import stats
path = 'D:\数据\data.xlsx'
data = pd.read_excel(path)
age = data['Age']
print(age.mean) #输出值为:29.64209269662921
2.抽取100个样本
age_sam = age.sample(100)
age_sam.describe()
3.计算置信区间
# 正态分布下的置信区间
def norm_conf(data,confidence = 0.95):
sample_mean = np.mean(data)
# numpy.std() 求标准差的时候默认是除以 n 的,即是有偏的,np.std无偏样本标准差方式为加入参数 ddof = 1;
# pandas.std() 默认是除以n-1 的,即是无偏的
sample_std = np.std(data,ddof = 1)
conf_interval = scipy.satas.norm.interval(confidence,loc = sample_name,scale = sample_std)
# T分布下的置信区间
def ttest_conf(data,confidence = 0.95):
sample_mean = np.mean(data)
sample_std = np.std(data,ddof = 1)
sample_size = len(data)
conf_interval = scipy.satas.norm.interval(confidence, df = (sample_size - 1), loc = sample_name, scale = sample_std)
4.重复抽取数据
age_means = []
for i in range (1000):
age_sample = age.sample(100, replace = True)
sam_mean = age_sample.mean()
age.means.append(sam_mean)
norm_conf(age_means)
ttest_conf(age_means)
5.绘制图像
import seaborn as sns
from matplotlib import pyplot as plt
sns.set_palette("hls") #设置所有图的颜色,使用hls色彩空间
sns.distplot(scale_means,color="r",bins=10,kde=True)
plt.title('Age')
plt.xlim(25,35)
plt.grid(True)
plt.show()
参考链接:
https://blog.csdn.net/qq_43315928/article/details/103658733
上一篇: 机器学习(二)非参数估计matlab例程
下一篇: Redis主从同步原理