//#define CHECKINDIRECT #include #include #include #include "port.h" #include "sz_err.h" #include "sz_srt.h" #if defined SZ_UNSRT_O4 #include "sz_hash2.h" // only used in sz_unsrt_o4 #endif // the sorting is a little slow due to attempts to reuse memory as soon as possible. // since the n-1 order sorted block is read sequentially a block can be freed (inserted // in a freelist) as soon as it is processed. Since the new n-order sorted pointers // grow as 256 different lists there is no need to have all memory available at once; // new memory is needed at the same speed old is freed. #define BITSSAMEBLOCK 10 #define BLOCKSIZE (1<nrblocks = (length+BLOCKSIZE-1)/BLOCKSIZE; if (globalinit && (p->nrblocks>globalptr.nrblocks)) { free(globalptr.index); free(globalptr.oldindex); free(globalptr.block); globalinit = 0; } if (!globalinit) { globalptr.nrblocks = p->nrblocks; globalptr.index = (ptrblock**) malloc(sizeof(ptrblock*)*globalptr.nrblocks); if (globalptr.index == NULL) sz_error(SZ_NOMEM_SORT); globalptr.oldindex = (ptrblock**) malloc(sizeof(ptrblock*)*globalptr.nrblocks); if (globalptr.oldindex == NULL) sz_error(SZ_NOMEM_SORT); globalptr.block = (ptrblock*) malloc(sizeof(ptrblock)*globalptr.nrblocks); if (globalptr.block == NULL) sz_error(SZ_NOMEM_SORT); globalinit = 1; } p->index = globalptr.index; p->oldindex = globalptr.oldindex; p->block = globalptr.block; p->freelist = NULL; for(i=0; i<18; i++) p->spare[i] = NULL; for (i=0; inrblocks; i++) p->index[i] = p->block + i; } static void extraspare(ptrstruct *p, int blocks) { int i; for (i=0; p->spare[i]!= NULL; i++) /* void */; p->spare[i] = (ptrblock*) malloc(sizeof(ptrblock)*blocks); if (p->spare[i] == NULL) sz_error(SZ_NOMEM_SORT); p->spare[i]->nextfree = p->freelist; p->freelist = p->spare[i]; for(i=1; ifreelist[i-1].nextfree = p->freelist + i; p->freelist[blocks-1].nextfree = NULL; } static void allocspareptrs(uint4 length, ptrstruct *p) { length = (length>>BITSSAMEBLOCK) + 1; if (length>256) length = 256; extraspare(p,length); } static void freeptrs(ptrstruct *p) { int i; // free(p->index); // free(p->oldindex); // free(p->block); for (i=0; p->spare[i] != NULL; i++) free(p->spare[i]); } static Inline void setptr(ptrstruct *p, uint4 i, uint4 ptr) { ptrblock *tmp; tmp = p->index[i>>BITSSAMEBLOCK]; if (tmp==NULL) { if (p->freelist == NULL) extraspare(p,16); tmp = p->index[i>>BITSSAMEBLOCK] = p->freelist; p->freelist = p->freelist->nextfree; } i &= BLOCKMASK; tmp->msbytes[i] = ptr>>8; tmp->lsbyte[i] = ptr & 0xff; } static void sortorder2(ptrstruct *p, unsigned char *in, uint4 length, uint4 *counts, unsigned int offset, uint4 *indexlast) { uint4 i, *o2counts, sum; unsigned int context; memset(counts, 0, 256*sizeof(uint4)); o2counts = (uint4*) calloc(0x10000, sizeof(uint4)); if (o2counts == NULL) sz_error(SZ_NOMEM_SORT); context = (unsigned)in[length-1]<<8; for(i=0; i>8 | (unsigned)(in[i])<<8; counts[in[i]]++; o2counts[context]++; } sum = length; for (i=0x10000; i--; ) { sum -= o2counts[i]; o2counts[i] = sum; } sum = length; for (i=0x100; i--; ) { sum -= counts[i]; counts[i] = sum; } context = (unsigned)in[length-offset]<<8 | in[length-offset-1]; if (context == 0xffff) *indexlast = length-1; else *indexlast = o2counts[context+1]-1; offset--; for(i=0; i>8 | (unsigned int)(in[i+length-offset])<<8; setptr(p,o2counts[context],i+length); o2counts[context]++; } for(i=offset; i>8 | (unsigned int)(in[i-offset])<<8; setptr(p,o2counts[context],i); o2counts[context]++; } free(o2counts); } static void incsortorder(ptrstruct *p, unsigned char *in, uint4 length, uint4 *counts, int offset, uint4 *indexlast) { uint4 i, block, ct[256]; ptrblock *curblock; unsigned char ch=0; {ptrblock **swap=p->index; p->index = p->oldindex; p->oldindex = swap;} memset(p->index,0,p->nrblocks*sizeof(ptrblock*)); memcpy(ct,counts,256*sizeof(uint4)); block = 0; curblock = p->oldindex[block]; for (i=0; i<=*indexlast; i++) { unsigned index = i & BLOCKMASK; uint4 tmp = (uint4)(curblock->msbytes[index])<<8 | curblock->lsbyte[index]; ch = in[tmp-offset]; setptr(p,ct[ch],tmp); ct[ch]++; if (index==BLOCKMASK && block!=p->nrblocks-1) //last ptr in block { curblock->nextfree = p->freelist; p->freelist = curblock; block++; curblock = p->oldindex[block]; } } *indexlast = ct[ch]-1; for ( ; imsbytes[index])<<8 | curblock->lsbyte[index]; ch = in[tmp-offset]; setptr(p,ct[ch],tmp); ct[ch]++; if (index==BLOCKMASK && blocknrblocks-1) //last ptr in block { curblock->nextfree = p->freelist; p->freelist = curblock; block++; curblock = p->oldindex[block]; } } curblock->nextfree = p->freelist; p->freelist = curblock; } static void finishsort(ptrstruct *p, unsigned char *in, uint4 length, uint4 *counts, uint4 *indexlast) { uint4 i, block, ct[256]; ptrblock *curblock; unsigned char ch=0; {ptrblock **swap=p->index; p->index = p->oldindex; p->oldindex = swap;} memset(p->index,0,p->nrblocks*sizeof(ptrblock*)); memcpy(ct,counts,256*sizeof(uint4)); block = 0; curblock = p->oldindex[block]; for (i=0; i<=*indexlast; i++) { unsigned index = i & BLOCKMASK; uint4 tmp = (uint4)(curblock->msbytes[index])<<8 | curblock->lsbyte[index]; ch = in[tmp-1]; setptr(p,ct[ch],in[tmp]); ct[ch]++; if (index==BLOCKMASK && block!=p->nrblocks-1) //last ptr in block { curblock->nextfree = p->freelist; p->freelist = curblock; block++; curblock = p->oldindex[block]; } } *indexlast = ct[ch]-1; for ( ; imsbytes[index])<<8 | curblock->lsbyte[index]; ch = in[tmp-1]; setptr(p,ct[ch],in[tmp]); ct[ch]++; if (index==BLOCKMASK && block!=p->nrblocks-1) //last ptr in block { curblock->nextfree = p->freelist; p->freelist = curblock; block++; curblock = p->oldindex[block]; } } curblock->nextfree = p->freelist; p->freelist = curblock; for (i=0; inrblocks-1; i++) memcpy(in+i*BLOCKSIZE, p->index[i]->lsbyte, BLOCKSIZE); i= p->nrblocks - 1; memcpy(in+BLOCKSIZE*i, p->index[i]->lsbyte, length-i*BLOCKSIZE); } // inout: bytes to be sorted; sorted bytes on return. must be length+order bytes long // length: number of bytes in inout // *indexlast: returns position of last context (needed for unsort) // order: order of context used in sorting (must be >=3) // the code assumes length>=order // and inout is length+order bytes long (only the first length need to be filled) void sz_srt(unsigned char *inout, uint4 length, uint4 *indexlast, unsigned int order) { uint4 i; ptrstruct p; uint4 counts[256]; allocptrs(length, &p); sortorder2(&p, inout, length, counts, order, indexlast); allocspareptrs(length, &p); for (i=order-2; i>1; i--) incsortorder(&p, inout, length, counts, i, indexlast); finishsort(&p, inout, length, counts, indexlast); freeptrs(&p); } #define INDIRECT 0x800000 #define setbit(flags,bit) (flags[bit>>3] |= 1<<(bit & 7)) #define getbit(flags,bit) ((flags[bit>>3]>>(bit&7)) & 1) static void makeorder2(unsigned char *flags, unsigned char *in, uint4 *counts, uint4 length) { uint4 i, j, ct[256]; memcpy(ct, counts, 256*sizeof(uint4)); // set bits in flag1 at start of each order 2 context // for order 2 this is more efficient than the method used for higher orders for(i=0; i<256; i++) setbit(flags,ct[i]); j = 0; for (i=0; i<255; i++) { uint4 k; for(k=counts[i+1]; j=3) // the code assumes length>=order void sz_unsrt(unsigned char *in, unsigned char *out, uint4 length, uint4 indexlast, uint4 *counts, unsigned int order) { uint4 i, j; static uint4 *table; static unsigned char *flags1=NULL; static unsigned char *flags2=NULL; unsigned char nocounts; // get counts if not supplied nocounts = counts==NULL; if (nocounts) { counts = (uint4*) calloc(256, sizeof(uint4)); for (i=0; i>3,1); if (flags1 == NULL) sz_error(SZ_NOMEM_SORT); } else memset(flags1,0,(length+8)>>3); makeorder2(flags1, in, counts, length); // now incease the order to desired order-1 if (flags2==NULL){ flags2 = (unsigned char*) calloc((length+8)>>3,1); if (flags2 == NULL) sz_error(SZ_NOMEM_SORT); } else memset(flags2,0,(length+8)>>3); for (i=2; i>8 | (uint)(*tmp)<<8; counters[i]++; } } // add context counts { register uint4 sum = length; for (i=0x10000; i--; ) { sum -= counters[i]; counters[i] = sum; } } // first sort pass if (context==NULL) { context = (uint2*)malloc(length*sizeof(uint2)); if (context == NULL) sz_error(SZ_NOMEM_SORT); } if (symbols==NULL) { symbols = (unsigned char*)(malloc(length)); if (symbols == NULL) sz_error(SZ_NOMEM_SORT); } // the following loop in assembler it would probably be a lot faster { register unsigned char *tmp; register uint4 ctx = (uint4)inout[length-4]<<8 | inout[length-5]; if (ctx == 0xffff) *indexlast = length-1; else *indexlast = counters[ctx+1]-1; ctx = (((uint4)inout[length-1] << 8 | inout[length-2]) << 8 | inout[length-3]) << 8 | inout[length-4]; for (tmp=inout; tmp> 16; ctx = ctx>>8 | (uint4)(symbols[x] = *tmp)<<24; } } /* second sort pass */ { uint4 lastpos = *indexlast; for (i=length; i>lastpos; ) // lastpos is the last processed in this loop { i-=1; inout[--counters[context[i]]] = symbols[i]; } } *indexlast = counters[context[i]]; while (i--) inout[--counters[context[i]]] = symbols[i]; // free(counters); // free(context); // free(symbols); } #endif #ifdef SZ_UNSRT_O4 // an alternate backtransform for order 4 using hash tables void sz_unsrt_o4(unsigned char *in, unsigned char *out, uint4 length, uint4 indexlast, uint4 *counts) { uint4 i, *contexts2, *contexts4, initcontext; uint2 *lastseen; unsigned char *loop, *endloop, nocounts; h2table htable; // get counts if not supplied nocounts = counts==NULL; if (nocounts) { counts = (uint4*) calloc(256, sizeof(uint4)); for (i=0; i indexlast) initcontext = i; sum -= contexts2[i]; contexts4[i] = sum; } initcontext <<= 16; // sum counts sum = length; for (i=0x100; i--;) { sum -= counts[i]; counts[i] = sum; } } initHash2(htable); // in hardware: make lastseen a bitfield and zero them all when you hit a new // order 2 context. set them whenever you see a new order 4 context. // much like in the loop for the 0x????0000 contexts. // loop over all order 2 contexts // count contexts 0x????0000 first loop = in; lastseen = (uint2*)calloc(0x10000,sizeof(uint2)); if (lastseen == NULL) sz_error(SZ_NOMEM_SORT); for (endloop = loop+contexts2[0]; loop= contexts4[initcontext>>16]) initcontext = (initcontext & 0xffff0000) | i; for (endloop = loop+contexts2[i]; loop>8 | (uint4)in[indexlast]<<24; { uint4 context = initcontext; if (out == NULL) for ( i=0; i>8) | ((uint4)outchar<<24); putc(outchar, stdout); } else for ( i=0; i>8) | ((uint4)outchar<<24); } if (context != initcontext) sz_error(SZ_NOTCYCLIC); } freeHash2(htable); } #endif #ifdef SZ_SRT_BW #include "qsort_u4.c" void sz_srt_BW(unsigned char *inout, uint4 length, uint4 *indexfirst) { uint4 i, counts[256], counts1[256], *contextp, start; for (i=0; i<256; i++) counts[i] = 0; for (i=0; i