请勿使用手机竖屏浏览该文章,否则会遇到算式显示不全的BUG我懒得改了

随机变量的数学特征

数学期望/均值

离散型

E(X)=μ=k=1xkpkE[g(x)]=k=1g(xk)pkE(X)=\mu=\sum_{k=1}^\infty x_k p_k\quad E[g(x)]=\sum_{k=1}^\infty g(x_k) p_k

连续型

E(X)=μ=xf(x)dxE[g(x)]=g(x)f(x)dxE(X)=\mu=\int_{-\infty}^\infty xf(x)dx\quad E[g(x)]=\int_{-\infty}^\infty g(x)f(x)dx

常见分布

Xπ(λ)E(X)=λXU(a,b)E(X)=a+b2xexp(θ)E(X)=θxb(n,p)E(X)=npX\sim\pi(\lambda)\Rightarrow E(X)=\lambda \\ X\sim U(a,b)\Rightarrow E(X)=\tfrac{a+b}{2} \\ x\sim exp(\theta)\Rightarrow E(X)=\theta \\ x\sim b(n,p)\Rightarrow E(X)=np

方差

D(X)=σ2=E{[XE(X)]2}=E(X2)[E(X)]2D(X)=\sigma^2=E\{[X-E(X)]^2\}=E(X^2)-[E(X)]^2

标准化变量(期望为0,方差为1)

X=XμσX^*=\frac{X-\mu}{\sigma}

常见分布

Xπ(λ)D(X)=λXU(a,b)D(X)=(ba)212xexp(θ)D(X)=θ2xb(n,p)D(X)=np(1p)X\sim\pi(\lambda)\Rightarrow D(X)=\lambda \\ X\sim U(a,b)\Rightarrow D(X)=\frac{(b-a)^2}{12} \\ x\sim exp(\theta)\Rightarrow D(X)=\theta^2 \\ x\sim b(n,p)\Rightarrow D(X)=np(1-p)

切比雪夫不等式

P{Xμ<ε}1σ2ε2P\{\lvert{X-\mu}\rvert\lt\varepsilon\}\ge1-\frac{\sigma^2}{\varepsilon^2}

协方差与相关系数

D(X+Y)=D(X)+D(Y)+2Cov(X,Y)Cov(X,Y)=E{[XE(x)][YE(Y)]}=E(XY)E(X)E(Y)ρXY=Cov(X,Y)D(X)D(Y)Cov(X1+X2,Y)=Cov(X1,Y)+Cov(X2,Y)D(X+Y)=D(X)+D(Y)+2Cov(X,Y)\\ Cov(X,Y)=E\{[X-E(x)][Y-E(Y)]\}=E(XY)-E(X)E(Y)\\ \rho_{XY}=\tfrac{Cov(X,Y)}{\sqrt{D(X)D(Y)}}\\ Cov(X_1+X_2,Y)=Cov(X_1,Y)+Cov(X_2,Y)

(X,Y)N(μ1,μ2,σ12,σ22,ρ)f(x,y)=12πσ1σ21ρ2exp{12(1ρ2)[(xμ1)2σ122ρ(xμ1)(yμ2)σ1σ2+(yμ2)2σ22]}ρ=ρXYCov(X,Y)=ρσ1σ2(X,Y)\sim N(\mu_1,\mu_2,\sigma_1^2,\sigma_2^2,\rho)\Rightarrow\\ f(x,y)=\tfrac{1}{2\pi\sigma_1\sigma_2\sqrt{1-\rho^2}}exp\{\tfrac{-1}{2(1-\rho^2)}[\tfrac{(x-\mu_1)^2}{\sigma_1^2}-2\rho\tfrac{(x-\mu_1)(y-\mu_2)}{\sigma_1\sigma_2}+\tfrac{(y-\mu_2)^2}{\sigma_2^2}]\}\\ \rho=\rho_{XY}\quad Cov(X,Y)=\rho\sigma_1\sigma_2

大数定律及中心极限定理

弱大数定理/辛钦大数定理

ε>0,limnP{1nk=1nXkμ<ε}=1Xˉ=1nk=1nXkPμ\forall\varepsilon\gt0,\lim_{n\rightarrow\infty}P\{\lvert\tfrac{1}{n}\sum_{k=1}^{n}X_k-\mu\rvert\lt\varepsilon\}=1\\ \bar X=\tfrac{1}{n}\sum_{k=1}^{n}X_k\stackrel{P}{\longrightarrow}\mu

伯努利大数定理

ε>0,limnP{fAnp<ε}=1\forall\varepsilon\gt0,\lim_{n\rightarrow\infty}P\{\lvert\tfrac{f_A}{n}-p\rvert\lt\varepsilon\}=1

独立同分布的中心极限定理

Yn=k=1nXknμnσlimnP{Ynx}=x12πet22dt=Φ(x)Y_n=\frac{\sum_{k=1}^{n}X_k-n\mu}{\sqrt{n}\sigma}\Rightarrow\\ \lim_{n\rightarrow\infty}P\{Y_n\le x\}=\int_{-\infty}^x\frac{1}{\sqrt{2\pi}}e^{-\tfrac{t^2}{2}}dt=\Phi(x)

Yn=1nk=1nXkμσ/n=Xˉμσ/nN(0,1)XˉN(μ,σ2/n)Y_n=\frac{\frac{1}{n}\sum_{k=1}^{n}X_k-\mu}{\sigma/\sqrt{n}}=\frac{\bar{X}-\mu}{\sigma/\sqrt{n}}\approx N(0,1)\\ \Rightarrow\bar{X}\approx N(\mu,\sigma^2/n)

棣莫弗-拉普拉斯定理

ηnb(n,p)limnP{ηnnpnp(1p)x}=Φ(x)\eta_n\sim b(n,p)\Rightarrow\\ \lim_{n\rightarrow\infty}P\{\frac{\eta_n-np}{\sqrt{np(1-p)}}\le x\}=\Phi(x)

样本及抽样分布

样本均值

Xˉ=1ni=1nXi\bar{X}=\frac{1}{n}\sum_{i=1}^{n}X_i

样本方差

S2=1n1i=1n(XiXˉ)2=1n1(i=1nXi2nXˉ2)S^2=\frac{1}{n-1}\sum_{i=1}^{n}(X_i-\bar{X})^2=\frac{1}{n-1}(\sum_{i=1}^{n}X_i^2-n\bar{X}^2)

样本k阶矩

Ak=1ni=1nXikPμk=E(Xk)A_k=\frac{1}{n}\sum_{i=1}^{n}X_i^k\stackrel{P}{\longrightarrow}\mu_k=E(X^k)

样本k阶中心矩

Bk=1ni=1n(XiXˉ)kB_k=\frac{1}{n}\sum_{i=1}^{n}(X_i-\bar{X})^k

χ2\chi^2分布(不对称)

χ2=i=1nXi2χ2(n)χ12χ2(n1),χ22χ2(n2)χ12+χ22χ2(n1+n2)E(χ2)=nD(χ2)=2n\chi^2=\sum_{i=1}^{n}X_i^2\sim\chi^2(n)\\ \chi_1^2\sim\chi^2(n_1),\chi_2^2\sim\chi^2(n_2)\Rightarrow\chi_1^2+\chi_2^2\sim\chi^2(n_1+n_2)\\ E(\chi^2)=n\quad D(\chi^2)=2n

t分布(对称)

XN(0,1),Yχ2(n)t=XY/nt(n)t1α(n)=tα(n)when n>45,tα(n)zαX\sim N(0,1),Y\sim\chi^2(n)\Rightarrow t=\frac{X}{\sqrt{Y/n}}\sim t(n)\\ t_{1-\alpha}(n)=-t_\alpha(n)\\ when\ n\gt45,t_\alpha(n)\approx z_\alpha

F分布(不对称)

Uχ2(n1),Vχ2(n2)F=Un2Vn1F(n1,n2)1FF(n2,n1)F1α(n1,n2)=1Fα(n2,n1)U\sim\chi^2(n_1),V\sim\chi^2(n_2)\Rightarrow F=\frac{Un_2}{Vn_1}\sim F(n_1,n_2)\\ \frac{1}{F}\sim F(n_2,n_1)\\ F_{1-\alpha}(n_1,n_2)=\frac{1}{F_\alpha(n_2,n_1)}

正态总体的样本均值与样本方差的分布

E(Xˉ)=μD(Xˉ)=σ2/nE(S2)=σ2E(\bar{X})=\mu\quad D(\bar{X})=\sigma^2/n\quad E(S^2)=\sigma^2\\

正态样本中

XˉN(μ,σ2n)Xˉμσ/nN(0,1)(n1)S2σ2χ2(n1)XˉμS/nt(n1)\bar{X}\sim N(\mu,\tfrac{\sigma^2}n)\Leftrightarrow\frac{\bar{X}-\mu}{\sigma/\sqrt{n}}\sim N(0,1)\\ \frac{(n-1)S^2}{\sigma^2}\sim\chi^2(n-1)\\ \frac{\bar{X}-\mu}{S/\sqrt{n}}\sim t(n-1)

双正态样本中

S12σ22S22σ12F(n11,n21)when σ12=σ22=σ2,(XˉYˉ)(μ1μ2)Sw1n1+1n2t(n1+n22)Sw=(n11)S12+(n21)S22n1+n22\frac{S_1^2\sigma_2^2}{S_2^2\sigma_1^2}\sim F(n_1-1,n_2-1)\\ when\ \sigma_1^2=\sigma_2^2=\sigma^2,\frac{(\bar{X}-\bar{Y})-(\mu_1-\mu_2)}{S_w\sqrt{\tfrac{1}{n_1}+\tfrac{1}{n_2}}}\sim t(n_1+n_2-2)\\ S_w=\sqrt{\frac{(n_1-1)S_1^2+(n_2-1)S_2^2}{n_1+n_2-2}}

参数估计

矩估计

μ^=Xˉσ2^=1ni=1n(XiXˉ)2\hat{\mu}=\bar{X}\quad \hat{\sigma^2}=\frac{1}{n}\sum_{i=1}^{n}(X_i-\bar{X})^2

最大似然估计

L(x1,x2,...,xn;θ^)=maxθΘL(x1,x2,...,xn;θ)L(x_1,x_2,...,x_n;\hat{\theta})=\max_{\theta\in\Theta}L(x_1,x_2,...,x_n;\theta)

μ\mu的置信区间

(Xˉ±σnzα2)(Xˉ±Sntα2(n1))(\bar{X}\pm\frac{\sigma}{\sqrt{n}}z_{\frac{\alpha}{2}})\\ (\bar{X}\pm\frac{S}{\sqrt{n}}t_{\frac{\alpha}{2}}(n-1))

σ2\sigma^2的置信区间

((n1)S2χα22(n1),(n1)S2χ1α22(n1))(\frac{(n-1)S^2}{\chi_{\tfrac{\alpha}{2}}^2(n-1)},\frac{(n-1)S^2}{\chi_{1-\tfrac{\alpha}{2}}^2(n-1)})

双总体μ1μ2\mu_1-\mu_2的置信区间

(XˉYˉ±zα2σ12n1+σ22n2)when σ12=σ22=σ2,(XˉYˉ±tα2(n1+n22)Sw1n1+1n2)Sw=(n11)S12+(n21)S22n1+n22(\bar{X}-\bar{Y}\pm z_{\tfrac{\alpha}{2}}\sqrt{\tfrac{\sigma_1^2}{n_1}+\tfrac{\sigma_2^2}{n_2}})\\ when\ \sigma_1^2=\sigma_2^2=\sigma^2,(\bar{X}-\bar{Y}\pm t_{\tfrac{\alpha}{2}}(n_1+n_2-2)S_w\sqrt{\tfrac{1}{n_1}+\tfrac{1}{n_2}})\\ S_w=\sqrt{\frac{(n_1-1)S_1^2+(n_2-1)S_2^2}{n_1+n_2-2}}

双总体σ12σ22\tfrac{\sigma_1^2}{\sigma_2^2}的置信区间

(S12S221Fα2(n11,n21),S12S221F1α2(n11,n21))(\frac{S_1^2}{S_2^2}\frac{1}{F_{\tfrac{\alpha}{2}}(n_1-1,n_2-1)},\frac{S_1^2}{S_2^2}\frac{1}{F_{1-\tfrac{\alpha}{2}}(n_1-1,n_2-1)})

(0-1)分布参数p的置信区间

(b±b24ac2a)a=n+zα22b=(2nXˉ+zα22)c=nXˉ2(\frac{-b\pm\sqrt{b^2-4ac}}{2a})\\ a=n+z_{\tfrac{\alpha}{2}}^2\quad b=-(2n\bar{X}+z_{\tfrac{\alpha}{2}}^2)\quad c=n\bar{X}^2

假设检验

分布拟合检验 皮尔逊定理

when n50,npi5i=1kfi2npinχ2(k1r)(r is the number of estimated parameters)when\ n\ge50,np_i\ge5\\ \sum_{i=1}^{k}\frac{f_i^2}{np_i}-n\sim\chi^2(k-1-r)\\ (r\ is\ the\ number\ of\ estimated\ parameters)

偏度峰度

G1N(0,6(n2)(n+1)(n+3))G2N(36n+1,24n(n2)(n3)(n+1)2(n+3)(n+5))G_1\sim N(0,\frac{6(n-2)}{(n+1)(n+3)})\\ G_2\sim N(3-\frac{6}{n+1},\frac{24n(n-2)(n-3)}{(n+1)^2(n+3)(n+5)})

方差分析及回归分析

单因素试验

总变差

ST=j=1si=1nj(XijXˉ)2=SE+SASE=j=1si=1nj(XijX.jˉ)2SA=j=1si=1nj(X.jˉXˉ)2=j=1snjx.jˉ2nXˉ2S_T=\sum_{j=1}^s\sum_{i=1}^{n_j}(X_{ij}-\bar{X})^2=S_E+S_A\\ S_E=\sum_{j=1}^s\sum_{i=1}^{n_j}(X_{ij}-\bar{X_{.j}})^2\\ S_A=\sum_{j=1}^s\sum_{i=1}^{n_j}(\bar{X_{.j}}-\bar{X})^2=\sum_{j=1}^{s}n_j\bar{x_{.j}}^2-n\bar{X}^2

ST=j=1si=1njXij2T..2nSA=j=1sT.j2njT..2nSE=STSAS_T=\sum_{j=1}^{s}\sum_{i=1}^{n_j}X_{ij}^2-\frac{T_{..}^2}{n}\\ S_A=\sum_{j=1}^{s}\frac{T_{.j}^2}{n_j}-\frac{T_{..}^2}{n}\\ S_E=S_T-S_A

SES_E的统计特性

SEσ2χ2(ns)n=j=1snjE(SE)=(ns)σ2σ^2=SEns\frac{S_E}{\sigma^2}\sim\chi^2(n-s)\quad n=\sum_{j=1}^{s}n_j\\ E(S_E)=(n-s)\sigma^2\\ \hat\sigma^2=\frac{S_E}{n-s}

SAS_A的统计特性

SAσ2χ2(s1)E(SA)=(s1)σ2F=SA(ns)SE(s1)F(s1,ns)\frac{S_A}{\sigma^2}\sim\chi^2(s-1)\\ E(S_A)=(s-1)\sigma^2\\ F=\frac{S_A(n-s)}{S_E(s-1)}\sim F(s-1,n-s)

一元线性回归

Sxx=i=1nxi2nxˉ2Syy=i=1nyi2nyˉ2Sxy=i=1nxiyinxˉyˉb^=SxySxxa^=yˉb^xˉQe=Syyb^SxyQeσ2χ2(n2)σ^2=Qen2S_{xx}=\sum_{i=1}^{n}x_i^2-n\bar{x}^2\\ S_{yy}=\sum_{i=1}^{n}y_i^2-n\bar{y}^2\\ S_{xy}=\sum_{i=1}^{n}x_iy_i-n\bar{x}\bar{y}\\ \hat{b}=\frac{S_{xy}}{S_{xx}}\quad \hat{a}=\bar{y}-\hat{b}\bar{x}\\ Q_e=S_{yy}-\hat{b}S_{xy}\\ \frac{Q_e}{\sigma^2}\sim\chi^2(n-2)\\ \hat{\sigma}^2=\frac{Q_e}{n-2}

线性假设的显著性(b0b\ne 0)检验

b^bσ^Sxxt(n2)\frac{\hat{b}-b}{\hat{\sigma}}\sqrt{S_{xx}}\sim t(n-2)

μ0(x)=a+bx0\mu_0(x)=a+bx_0的置信区间

(Y0^±tα2(n2)σ^1n+(x0xˉ)2Sxx)(\hat{Y_0}\pm t_{\frac{\alpha}{2}}(n-2)\hat{\sigma}\sqrt{\frac{1}{n}+\frac{(x_0-\bar{x})^2}{S_{xx}}})

μ0(x)=a+bx0\mu_0(x)=a+bx_0的预测区间

(Y0^±tα2(n2)σ^1n+1+(x0xˉ)2Sxx)(\hat{Y_0}\pm t_{\frac{\alpha}{2}}(n-2)\hat{\sigma}\sqrt{\frac{1}{n}+1+\frac{(x_0-\bar{x})^2}{S_{xx}}})