欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

python参数估计实战

程序员文章站 2022-05-09 10:56:38
...

介绍

本次实战仅仅以总体均值的区间估计做为例子
python参数估计实战

代码

在这里插入代码import pandas as pd
import numpy as np
from scipy import stats
import math
import seaborn as sns
from matplotlib import pyplot as plt


plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False

height = np.arange(160,185,0.01)
height = pd.DataFrame({'height':height})
print(height.describe())
overall_std = (height.std())['height']
overall_mean = (height.mean())['height']

python参数估计实战

计算正态分布下的置信区间

#方差未知 使用样本方差
def norm_conf1 (data,confidence=0.95):
    sample_mean = np.mean(data)
    sample_std = np.std(data,ddof=1)
    sample_size = len(data)
    conf_intveral = stats.norm.interval(confidence, loc=sample_mean, scale=sample_std)
    print(conf_intveral)

#方差已知 Overall_std
def norm_conf2 (data,std_n,confidence=0.95):
    sample_mean = np.mean(data)
    sample_size = len(data)
    conf_intveral = stats.norm.interval(confidence, loc=sample_mean, scale=std_n)
    print(conf_intveral)

计算T分布下的置信区间

#总体方差未知
def ttest_conf1 (data,confidence=0.95):
    sample_mean = np.mean(data)
    sample_std = np.std(data,ddof=1)
    sample_size = len(data)
    conf_intveral = stats.t.interval(confidence,df = (sample_size-1) , loc=sample_mean, scale=sample_std)
    print(conf_intveral)

#总体方差已知
def ttest_conf2 (data,std_n,confidence=0.95):
    sample_mean = np.mean(data)
    sample_std = np.std(data,ddof=1)
    sample_size = len(data)
    conf_intveral = stats.t.interval(confidence,df = (sample_size-1) , loc=sample_mean, scale=std_n)
    print(conf_intveral)

模拟大样本,非正态分布,总体方差已知和未知的情况

scale_means1 = []
n=100
for _ in range(1000):
   scale_sample1 = height.sample(n, replace=True)
   mean1 = scale_sample1.mean()
   scale_means1.append(mean1)
sns.set_palette("hls") #设置所有图的颜色,使用hls色彩空间
sns.distplot(scale_means1,color="r",bins=10,kde=True)
plt.title('Height')
plt.xlim(165,180)
plt.grid(True)
plt.show()   
# 总体非正态样本 方差未知
print('---------非正态,方差未知,大样本---------------------')
norm_conf1(scale_means1)
print('---------非正态,方差已知,大样本---------------------')
# 总体非正太样本 方差已知
norm_conf2(scale_means1,overall_std/math.sqrt(n))

python参数估计实战
python参数估计实战

模拟正态分布总体,大小样本,总体方差已知和未知情况

# 总体正态分布 是用上面数据的均值和方差进行正态分布模拟
normalHeight = np.random.normal(overall_mean, overall_std,2500)
normalHeight = pd.DataFrame({'normalHeight':normalHeight})

scale_means2 = []
n=20
for _ in range(1000):
   scale_sample2 = normalHeight.sample(n, replace=True)
   mean2 = scale_sample2.mean()
   scale_means2.append(mean2)
sns.set_palette("hls") #设置所有图的颜色,使用hls色彩空间
sns.distplot(scale_means2,color="r",bins=10,kde=True)
plt.title('Height')
plt.xlim(165,180)
plt.grid(True)
plt.show()   
#小样本使用t分布
print('---------正态,方差为未知,小样本---------------------')
ttest_conf1(scale_means2)
print('---------正态,方差为已知,小样本---------------------')
ttest_conf2(scale_means2,overall_std/math.sqrt(n))

python参数估计实战
python参数估计实战

scale_means2 = []
n=100
for _ in range(1000):
   scale_sample2 = normalHeight.sample(n, replace=True)
   mean2 = scale_sample2.mean()
   scale_means2.append(mean2)

print('---------正态,方差为未知,大样本---------------------')
norm_conf1(scale_means2)
print('---------正态,方差为已知,大样本---------------------')
norm_conf2(scale_means2,overall_std/math.sqrt(n))

python参数估计实战
python参数估计实战