陳鍾誠

Version 1.0

推論統計

中央極限定理

import numpy as np
from scipy.stats import *
import matplotlib.pyplot as plt

def CLT(s, bins=100, rwidth=0.9):
    n = len(s)
    fig, axs = plt.subplots(2, 2) # 設為 2*2 的四格繪圖版
    # 1 個一組平均
    s1 = np.array(s)
    axs[0,0].hist(s1, bins=bins, rwidth=rwidth)
    # 2 個一組平均
    s2 = s1.reshape((2, int(n/2))).sum(axis=0)/2
    axs[0,1].hist(s2, bins=bins, rwidth=rwidth)
    # 10 個一組平均
    s10 = s1.reshape((10, int(n/10))).sum(axis=0)/10
    axs[1,0].hist(s10, bins=bins, rwidth=rwidth)
    # 20 個一組平均
    s20 = s1.reshape((20, int(n/20))).sum(axis=0)/20
    axs[1,1].hist(s20, bins=bins, rwidth=rwidth)
    plt.show() # 顯示圖形

n = 100000
CLT(uniform.rvs(size=n))
CLT(np.random.choice([0,1], size=n))
CLT(np.random.choice([1,2,3,4,5,6], size=n))

執行結果

$ python clt1.py

圖: 中央極限定理 – 均等分布

圖: 中央極限定理 – 丟銅板

圖: 中央極限定理 – 擲骰子

檢定

雙樣本檢定

# https://stackoverflow.com/questions/22611446/perform-2-sample-t-test
import numpy as np
from scipy.stats import ttest_ind, ttest_ind_from_stats
from scipy.special import stdtr

np.random.seed(1)

# Create sample data.
a = np.random.randn(40)
b = 4*np.random.randn(50)

# Use scipy.stats.ttest_ind.
t, p = ttest_ind(a, b, equal_var=False)
print("ttest_ind:            t = %g  p = %g" % (t, p))

# Compute the descriptive statistics of a and b.
abar = a.mean()
avar = a.var(ddof=1)
na = a.size
adof = na - 1

bbar = b.mean()
bvar = b.var(ddof=1)
nb = b.size
bdof = nb - 1

# Use scipy.stats.ttest_ind_from_stats.
t2, p2 = ttest_ind_from_stats(abar, np.sqrt(avar), na,
                              bbar, np.sqrt(bvar), nb,
                              equal_var=False)
print("ttest_ind_from_stats: t = %g  p = %g" % (t2, p2))

# Use the formulas directly.
tf = (abar - bbar) / np.sqrt(avar/na + bvar/nb)
dof = (avar/na + bvar/nb)**2 / (avar**2/(na**2*adof) + bvar**2/(nb**2*bdof))
pf = 2*stdtr(dof, -np.abs(tf))

print("formula:              t = %g  p = %g" % (tf, pf))

執行

$ python ttest1.py
ttest_ind:            t = -1.5827  p = 0.118873
ttest_ind_from_stats: t = -1.5827  p = 0.118873
formula:              t = -1.5827  p = 0.118873