#include "header.h"
typedef enum {A,B,C,D,E} letters;
/*convert a byte to a letter*/
letters b2l(int byte)
{
register int cnt1s=0, tmp, j;
for(j=0; j<8; ++j){
tmp=(byte>>j) & 1;
if( tmp==1 ) ++cnt1s;
}
if(cnt1s<3) return A;
switch( cnt1s ){
case 3: return B;
case 4: return C;
case 5: return D;
default: break;
}
return E;
}
/* get a byte from a stream of bytes */
int getb(char *filename, int rt)
{
static short rest=0;
static uniform wd;
if(rest==0){
wd=uni(filename);
rest=4;
}
--rest;
return ( (wd >> rest*8) & 255 ); /*255=pow(2,8)-1*/
}
/* get a specific byte of a random integer */
int getsb(char *filename, int rt)
{
return ( (uni(filename) >> rt) & 255 );
}
/* count the 1's in the sequence of bytes */
real cnt_stat(char *fn, counter no_wds, char *test, int rshft)
{
const real mean=2500, std=sqrt(5000);
const real prob[]={37/256.,56/256.,70/256.,56/256.,37/256.};
int (*byte)(char*, int);
register int wd;
register counter i;
short j, s;
counter *f, f4[625], f5[3125];
real Ef, chsq=0, z;
if( test[1]=='p' ){
byte=getsb;
printf("\t%d to %d ", 25-rshft,32-rshft);
}
else{
byte=getb;
printf("\t\t");
}
for(i=0; i<625; ++i){
f4[i]=0;
f5[i]=0;
}
for(i=625; i<3125; ++i){
f5[i]=0;
}
#define BYTE ( byte(fn, rshft) )
wd=(625*b2l(BYTE)+125*b2l(BYTE)+25*b2l(BYTE)+5*b2l(BYTE)+b2l(BYTE));
for(i=1; i<no_wds; ++i){
wd %= 625; /*Erase leftmost letter of w*/
++f4[wd];
wd=5*wd+b2l(BYTE); /*Shift wd left, add new letter*/
++f5[wd];
}
/* compute Q5-Q4, where Q4,Q5=sum(obs-exp)**2/exp */
for(s=4; s<=5; ++s){
int ltrspwd, wdspos;
letters ltr;
switch(s){
case 4: wdspos=625; f=f4; ltrspwd=4; break;
case 5: wdspos=3125; f=f5; chsq=-chsq; ltrspwd=5; break;
}
for(i=0; i<wdspos; ++i){
Ef=no_wds;
wd=i;
for(j=0; j<ltrspwd; ++j){
ltr=wd%5;
Ef*=prob[ltr];
wd/=5;
}
chsq+=( *(f+i)-Ef )*( *(f+i)-Ef )/Ef;
}
}
uni("close");
z=(chsq-mean)/std;
printf("\t%.2f\t\t% .3f\t\t%f\n", chsq, z, 1-Phi(z));
return chsq;
}
/*print the title*/
void cnt_ttl(char *fn, char *test)
{
if( strncmp(test, "specific", 2 )==0){
puts("\n\t|-------------------------------------------------------------|");
puts("\t| This is the COUNT-THE-1''s TEST for specific bytes. |");
puts("\t|Consider the file under test as a stream of 32-bit integers. |");
puts("\t|From each integer, a specific byte is chosen , say the left- |");
puts("\t|most: bits 1 to 8. Each byte can contain from 0 to 8 1''s, |");
puts("\t|with probabilitie 1,8,28,56,70,56,28,8,1 over 256. Now let |");
puts("\t|the specified bytes from successive integers provide a string|");
puts("\t|of (overlapping) 5-letter words, each \"letter\" taking values |");
puts("\t|A,B,C,D,E. The letters are determined by the number of 1''s,|");
puts("\t|in that byte: 0,1,or 2 ---> A, 3 ---> B, 4 ---> C, 5 ---> D, |");
puts("\t|and 6,7 or 8 ---> E. Thus we have a monkey at a typewriter |");
puts("\t|hitting five keys with with various probabilities: 37,56,70, |");
puts("\t|56,37 over 256. There are 5^5 possible 5-letter words, and |");
puts("\t|from a string of 256,000 (overlapping) 5-letter words, counts|");
puts("\t|are made on the frequencies for each word. The quadratic form|");
puts("\t|in the weak inverse of the covariance matrix of the cell |");
puts("\t|counts provides a chisquare test: Q5-Q4, the difference of |");
puts("\t|the naive Pearson sums of (OBS-EXP)^2/EXP on counts for 5- |");
puts("\t|and 4-letter cell counts. |");
puts("\t|-------------------------------------------------------------|\n");
printf("\t\tTest results for specific bytes from %s\n", fn);
}
else{
puts("\n\t|-------------------------------------------------------------|");
puts("\t| This is the COUNT-THE-1''s TEST on a stream of bytes. |");
puts("\t|Consider the file under test as a stream of bytes (four per |");
puts("\t|32 bit integer). Each byte can contain from 0 to 8 1''s, |");
puts("\t|with probabilities 1,8,28,56,70,56,28,8,1 over 256. Now let |");
puts("\t|the stream of bytes provide a string of overlapping 5-letter|");
puts("\t|words, each \"letter\" taking values A,B,C,D,E. The letters are|");
puts("\t|determined by the number of 1''s in a byte: 0,1,or 2 yield A,|");
puts("\t|3 yields B, 4 yields C, 5 yields D and 6,7 or 8 yield E. Thus|");
puts("\t|we have a monkey at a typewriter hitting five keys with vari-|");
puts("\t|ous probabilities (37,56,70,56,37 over 256). There are 5^5 |");
puts("\t|possible 5-letter words, and from a string of 256,000 (over- |");
puts("\t|lapping) 5-letter words, counts are made on the frequencies |");
puts("\t|for each word. The quadratic form in the weak inverse of |");
puts("\t|the covariance matrix of the cell counts provides a chisquare|");
puts("\t|test: Q5-Q4, the difference of the naive Pearson sums of |");
puts("\t|(OBS-EXP)^2/EXP on counts for 5- and 4-letter cell counts. |");
puts("\t|-------------------------------------------------------------|\n");
printf("\t\tTest result for the byte stream from %s\n", fn);
}
return;
}
/* type "stream" (use small case and don't forget the quotation mark) to test
a stream of bytes. type "specific" to test specific bytes */
void cnt1s(char *filename, char *test)
{
int rt=24;
counter no_wds=256000;
char *s="bits used";
if(strncmp(test, "stream", 2)==0){
rt=-1;
s="\t";
no_wds=2560000;
}
cnt_ttl(filename, test);
printf("\t (Degrees of freedom: 5^4-5^3=2500; sample size: %d)\n\n", no_wds);
printf("\t%s\tchisquare\tz-score\t\tp-value\n", s);
do{
cnt_stat(filename, no_wds, test, rt);
--rt;
}while(rt>=0);
return;
}
/*main()
{
cnt1s("binc", "stream");
cnt1s("binc", "specific");
return;
}*/
syntax highlighted by Code2HTML, v. 0.9.1