comment Statistical Functions endcomment function meandev (x,v=0) ## Return the mean value and the standard deviation of x1,...,xn. ## E.g. {m,d}=mean([1,2,3,3,4,2,5]). ## An additional parameter v may contain the multiplicities of x. ## E.g. {m,d}=mean([1,2,3,4,5],[1,2,2,1,1]) (same example). ## m=mean(x) will assign the mean value only! if (argn()==2) n=sum(v); m=sum(x*v)/n; d=sqrt((sum(x*x*v)-n*m*m)/(n-1)); else n=cols(x); m=sum(x)/n; d=sqrt((sum(x*x)-n*m*m)/(n-1)); endif; return {m,d}; endfunction function mean (x) ## Returns the mean value of x. ## An additional parameter may contain multiplicities. ## See: meandev. {m,d}=meandev(x,args()); return m; endfunction function dev (x) ## Returns the standard deviation of x. ## An additional parameter may contain multiplicities. ## See: meandev. {m,d}=meandev(x,args()); return d; endfunction function qnormal (x,m,d) ## Returns the normal density with mean m and deviation d. return 1/(d*sqrt(2*pi))*exp(-(x-m)^2/2/d^2); endfunction function xplotrange (r,v) ## Plots a barplot of the multiplicities v[i] in the ranges ## r[i],r[i+1]. r must be one longer than v. ## You can set the plot area with setplot beforehand. s=scaling(1); if s; setplot(min(r),max(r),0,max(v)); endif; scaling(s); n=cols(v); xplotbar(r[1:n],0,r[2:n+1]-r[1:n],v); return plot(); endfunction function chitest (x,y) ## Perform a xhi^2 test, if x obeys the distribution y. ## Return the error, if you reject this. return 1-chidis(sum((x-y)^2/y),cols(x)-1); endfunction function testnormal (r,n,v,m,d) ## Test the data v[i] in the ranges r[i],r[i+1] against ## the normal deviation with mean m and deviation d, ## using the xhi^2 method. ## n is the total number of data (including those ## less than r[1] and larger than r[n]). ## Return the error you have, if you reject normal distribution. t=normaldis((r-m)/d); k=cols(v); p=(t[2:k+1]-t[1:k])*n; return chitest(v,p); endfunction function ttest (m,d,n,mu) ## Test, if the measured mean m with deviation d of n data ## can come from a distribution with mean value mu. ## Returns the error you have, if you reject this. return 1-tdis(abs(m-mu)/d*sqrt(n),n-1); endfunction function tcompare (m1,d1,n1,m2,d2,n2) ## Test, if two measured data with means mi, deviation di ## of ni data (i=1,2), aggree in mean. ## The data must be normally distributed. ## Returns the error you have, if you reject this. h1=d1^2/n1; h2=d2^2/n2; return 1-tdis(abs(m1-m2)/sqrt(h1+h2), .. (h1+h2)^2/(h1^2/(n1+1)+h2^2/(n2+1))-2); endfunction function tcomparedata (x,y) ## Compare x and y on same mean, where x and y are ## normally distributed with different deviation. ## Return the error you have, if you reject this. return tcompare(mean(x),dev(x),cols(x),mean(y),dev(y),cols(y)); endfunction function tabletest (A) ## Test the results a[i,j] for independence of the rows ## from the columns. (chi^2 test) ## This should only be used for large table entries. ## Return the error you have, if you reject independence. c=sum(A); r=sum(A')'; E=c*r/sum(r); return 1-chidis(totalsum((A-E)^2/E),(cols(A)-1)*(rows(A)-1)); endfunction function varanalysis ## Test the normally distributed data x1,x2,x3,... for same ## mean value. ## varanalysis(x1,x2,x3,...), where each xi is a row vector. ## Returns the error you have, if you reject same mean. md=[mean(arg1),cols(arg1)]; s=sum((arg1-md[1])^2); loop 2 to argn(); x=args(#); mx=mean(x); md=md_[mx,cols(x)]; s=s+sum((x-mx)^2); end; md=md'; n=sum(md[2]); mt=sum(md[1]*md[2])/n; s1=sum(md[2]*(md[1]-mt)^2); return 1-fdis((s1/(argn()-1))/(s/(n-argn())),argn()-1,n-argn()); endfunction function binsum1 (i,n,p) if i>=n; return 1; endif; d=sqrt(n*p*(1-p)); i1=n*p-10*d; if i1<0; i1=0; endif; if in*p+10*d; return 1; endif; s=n*log(1-p); if i>=1; j=1:i+1; s=cumsum(s|(log((n-j+1)/j*p/(1-p)))); endif; return sum(exp(s[floor(i1):i+1])); endfunction function binsum (i,n,p) ## Compute the probablitiy of getting i or less hits ## in n runs, where the probability for each hit is p. ## Works even for large i and n, but is ineffective then. ## One may use the normalsum function for large n and ## medium p. return map("binsum1",i,n,p); endfunction function normalsum (i,n,p) ## Works like binsum, but is much faster for large n ## and medium p. return normaldis((i+0.5-n*p)/sqrt(n*p*(1-p))); endfunction function invbinsum1 (x,n,p) if x<=0; return 0; endif; if x>=1; return n; endif; s=n*log(1-p); d=sqrt(n*p*(1-p)); i1=floor(n*p-10*d); i2=floor(n*p+10*d); if i1<0; i1=0; endif; if i2>n; i2=n; endif; j=1:i2; s=cumsum(s|(log((n-j+1)/j*p/(1-p)))); return find(cumsum(exp(s[i1:length(s)])),x)+i1; endfunction function invbinsum (x,n,p) ## Computes i, such that binomialsum(i,n,p) is just larger than x. return map("invbinsum1",x,n,p) endfunction function hypergeomsum1 (i,n,i1,n1) if i=i1; return 1; endif; if i1<=n1-n; s=logbin(n1-n,i1)-logbin(n1,i1); if i>=1; j=1:i; s=cumsum(s|(log(n-j+1)-log(j)+log((i1-j+1))-log((n1-n-i1+j)))); endif; else s=log(bin(n,i1-(n1-n)))-log(bin(n1,i1)); if i>i1-(n1-n); j=(i1-(n1-n)+1):i; s=cumsum(s|(log(n-j+1)-log(j)+log(i1-j+1)-log(n1-n-i1+j))); endif; endif return sum(exp(s)); endfunction function hypergeomsum (i,n,itot,ntot) ## Return the probability of hitting i or less black balls, if ## n are chosen out of ntot, and there are a total of itot black ## balls (and ntot-itot white balls). return map("hypergeomsum1",i,n,itot,ntot); endfunction function mediantest (a,b) ## Test the two distributions a and b on equal mean value. ## To do this both distributions are checked on exceeding ## the median of both. ## Returns the error you have, if you assume a does not ## exceed the median to often (i.e. a may be equal to b) c=sort(a|b); n=cols(c); if mod(n,2)==0; m=(c[n/2]+c[n/2+1])/2; else m=c[n/2+1]; endif; return 1-hypergeomsum(sum(an1)); return 1-normaldis(abs((min(R1-n1*(n1+1)/2,R2-n2*(n2+1)/2)-n1*n2/2)/ .. sqrt(n1*n2*(n1+n2+1)/12))); endfunction function signtest (a,b) ## Assume a(i) and b(i) are results of treatment a and b at i. ## a and b must be row vectors of equal length. ## Test, if a is not better than b. ## Return the error you have, if you decide that a is better ## than b. n=cols(a); i=sum(a>b); return 1-binsum(i-1,n,0.5); endfunction function wilcoxon (a,b,eps=sqrt(epsilon())) ## This is a sharper test for the same problem as in signtest. ## See: signtest ## Returns the error you have, if you decide that a is better ## than b. d=a-b; n=cols(d); {c,i}=sort(abs(d)); R=1:n; {c1,i1}=sort(c+eps*(n:-1:1)); R=(R+R[i1])/2; R1=sum(R*(d[i]<0)); R2=sum(R*(d[i]>=0)); W=R2; return 1-normaldis((W-n*(n+1)/4)/ .. sqrt(n*(n+1)*(2*n+1)/24))); endfunction