python参数估计实战
程序员文章站
2022-05-09 10:56:38
...
介绍
本次实战仅仅以总体均值的区间估计做为例子
代码
在这里插入代码import pandas as pd
import numpy as np
from scipy import stats
import math
import seaborn as sns
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False
height = np.arange(160,185,0.01)
height = pd.DataFrame({'height':height})
print(height.describe())
overall_std = (height.std())['height']
overall_mean = (height.mean())['height']
计算正态分布下的置信区间
#方差未知 使用样本方差
def norm_conf1 (data,confidence=0.95):
sample_mean = np.mean(data)
sample_std = np.std(data,ddof=1)
sample_size = len(data)
conf_intveral = stats.norm.interval(confidence, loc=sample_mean, scale=sample_std)
print(conf_intveral)
#方差已知 Overall_std
def norm_conf2 (data,std_n,confidence=0.95):
sample_mean = np.mean(data)
sample_size = len(data)
conf_intveral = stats.norm.interval(confidence, loc=sample_mean, scale=std_n)
print(conf_intveral)
计算T分布下的置信区间
#总体方差未知
def ttest_conf1 (data,confidence=0.95):
sample_mean = np.mean(data)
sample_std = np.std(data,ddof=1)
sample_size = len(data)
conf_intveral = stats.t.interval(confidence,df = (sample_size-1) , loc=sample_mean, scale=sample_std)
print(conf_intveral)
#总体方差已知
def ttest_conf2 (data,std_n,confidence=0.95):
sample_mean = np.mean(data)
sample_std = np.std(data,ddof=1)
sample_size = len(data)
conf_intveral = stats.t.interval(confidence,df = (sample_size-1) , loc=sample_mean, scale=std_n)
print(conf_intveral)
模拟大样本,非正态分布,总体方差已知和未知的情况
scale_means1 = []
n=100
for _ in range(1000):
scale_sample1 = height.sample(n, replace=True)
mean1 = scale_sample1.mean()
scale_means1.append(mean1)
sns.set_palette("hls") #设置所有图的颜色,使用hls色彩空间
sns.distplot(scale_means1,color="r",bins=10,kde=True)
plt.title('Height')
plt.xlim(165,180)
plt.grid(True)
plt.show()
# 总体非正态样本 方差未知
print('---------非正态,方差未知,大样本---------------------')
norm_conf1(scale_means1)
print('---------非正态,方差已知,大样本---------------------')
# 总体非正太样本 方差已知
norm_conf2(scale_means1,overall_std/math.sqrt(n))
模拟正态分布总体,大小样本,总体方差已知和未知情况
# 总体正态分布 是用上面数据的均值和方差进行正态分布模拟
normalHeight = np.random.normal(overall_mean, overall_std,2500)
normalHeight = pd.DataFrame({'normalHeight':normalHeight})
scale_means2 = []
n=20
for _ in range(1000):
scale_sample2 = normalHeight.sample(n, replace=True)
mean2 = scale_sample2.mean()
scale_means2.append(mean2)
sns.set_palette("hls") #设置所有图的颜色,使用hls色彩空间
sns.distplot(scale_means2,color="r",bins=10,kde=True)
plt.title('Height')
plt.xlim(165,180)
plt.grid(True)
plt.show()
#小样本使用t分布
print('---------正态,方差为未知,小样本---------------------')
ttest_conf1(scale_means2)
print('---------正态,方差为已知,小样本---------------------')
ttest_conf2(scale_means2,overall_std/math.sqrt(n))
scale_means2 = []
n=100
for _ in range(1000):
scale_sample2 = normalHeight.sample(n, replace=True)
mean2 = scale_sample2.mean()
scale_means2.append(mean2)
print('---------正态,方差为未知,大样本---------------------')
norm_conf1(scale_means2)
print('---------正态,方差为已知,大样本---------------------')
norm_conf2(scale_means2,overall_std/math.sqrt(n))
上一篇: 参数估计(python实现)
下一篇: matlab代码---参数估计