#include "port.h" #define swap(x,y) {uint4 tmp = *(x); *(x) = *(y); *(y) = tmp;} /* prototypes for local routines */ void shortsort ( uint4 *lo, uint4 *hi, unsigned char *data, uint4 minmatch ); static Inline int qscmp(uint4 a, uint4 b, unsigned char *data, uint4 *ml) { unsigned char *a1,*b1; a1 = data + a; b1 = data + b; while (a1 > data && *a1 == *b1) { a1--; b1--; } *ml = data+a-a1; if (*a1 <= *b1) return -1; return 1; } static int qscompare(uint4 a, uint4 b, unsigned char *data, uint4 *ml) { if (a2. * *Exit: * returns void * *Exceptions: * *******************************************************************************/ /* sort the array between lo and hi (inclusive) */ static void qsort_u4 ( uint4 *base, uint4 num, unsigned char *data, uint4 minmatch ) { uint4 *lo, *hi; /* ends of sub-array currently sorting */ uint4 *mid; /* points to middle of subarray */ uint4 *loguy, *higuy; /* traveling pointers for partition step */ uint4 size; /* size of the sub-array */ uint4 *lostk[30], *histk[30], mm[30]; uint4 lomm, himm; /* minmatch for low/high */ int stkptr; /* stack for saving sub-array to be processed */ /* Note: the number of stack entries required is no more than 1 + log2(size), so 30 is sufficient for any array */ if (num < 2) return; /* nothing to do */ stkptr = 0; /* initialize stack */ lo = base; hi = base + (num-1); /* initialize limits */ /* this entry point is for pseudo-recursion calling: setting lo and hi and jumping to here is like recursion, but stkptr is prserved, locals aren't, so we preserve stuff on the stack */ recurse: size = (hi - lo) + 1; /* number of el's to sort */ /* below a certain size, it is faster to use a O(n^2) sorting method */ if (size <= CUTOFF) { shortsort(lo, hi, data, minmatch); } else { uint4 ml; /* First we pick a partititioning element. The efficiency of the algorithm demands that we find one that is approximately the median of the values, but also that we select one fast. Using the first one produces bad performace if the array is already sorted, so we use the middle one, which would require a very wierdly arranged array for worst case performance. Testing shows that a median-of-three algorithm does not, in general, increase performance. */ mid = lo + rand()%size; /* find middle element */ swap(mid, lo) /* swap it to beginning of array */ /* We now wish to partition the array into three pieces, one consisiting of elements <= partition element, one of elements equal to the parition element, and one of element >= to it. This is done below; comments indicate conditions established at every step. */ loguy = lo; higuy = hi + 1; lomm = num-minmatch; himm = num-minmatch; ml = num-minmatch; /* Note that higuy decreases and loguy increases on every iteration, so loop must terminate. */ for (;;) { /* lo <= loguy < hi, lo < higuy <= hi + 1, A[i] <= A[lo] for lo <= i <= loguy, A[i] >= A[lo] for higuy <= i <= hi */ do { if (ml hi or A[loguy] > A[lo] */ do { if (ml lo && qscompare(*higuy-minmatch,*lo-minmatch,data,&ml ) >= 0); /* lo-1 <= higuy <= hi, A[i] >= A[lo] for higuy < i <= hi, either higuy <= lo or A[higuy] < A[lo] */ if (higuy < loguy) break; /* if loguy > hi or higuy <= lo, then we would have exited, so A[loguy] > A[lo], A[higuy] < A[lo], loguy < hi, highy > lo */ swap(loguy, higuy) /* A[loguy] < A[lo], A[higuy] > A[lo]; so condition at top of loop is re-established */ } if (ml= A[lo] for higuy < i <= hi, A[i] <= A[lo] for lo <= i < loguy, higuy < loguy, lo <= higuy <= hi implying: A[i] >= A[lo] for loguy <= i <= hi, A[i] <= A[lo] for lo <= i <= higuy, A[i] = A[lo] for higuy < i < loguy */ swap(lo, higuy) /* put partition element in place */ /* OK, now we have the following: A[i] >= A[higuy] for loguy <= i <= hi, A[i] <= A[higuy] for lo <= i < higuy A[i] = A[lo] for higuy <= i < loguy */ /* We've finished the partition, now we want to sort the subarrays [lo, higuy-1] and [loguy, hi]. We do the smaller one first to minimize stack usage. We only sort arrays of length 2 or more.*/ if ( higuy - 1 - lo >= hi - loguy ) { if (lo + 1 < higuy) { lostk[stkptr] = lo; histk[stkptr] = higuy - 1; mm[stkptr] = lomm+minmatch; ++stkptr; } /* save big recursion for later */ if (loguy < hi) { lo = loguy; minmatch += himm; goto recurse; /* do small recursion */ } } else { if (loguy < hi) { lostk[stkptr] = loguy; histk[stkptr] = hi; mm[stkptr] = himm+minmatch; ++stkptr; /* save big recursion for later */ } if (lo + 1 < higuy) { hi = higuy - 1; minmatch += lomm; goto recurse; /* do small recursion */ } } } /* We have sorted the array, except for any pending sorts on the stack. Check if there are any, and do them. */ --stkptr; if (stkptr >= 0) { lo = lostk[stkptr]; hi = histk[stkptr]; minmatch = mm[stkptr]; goto recurse; /* pop subarray from stack */ } else return; /* all subarrays done */ } /*** *shortsort(hi, lo, width, comp) - insertion sort for sorting short arrays * *Purpose: * sorts the sub-array of elements between lo and hi (inclusive) * side effects: sorts in place * assumes that lo < hi * *Entry: * uint4 *lo = pointer to low element to sort * uint4 *hi = pointer to high element to sort * unsigned width = width in bytes of each array element * int (*comp)() = pointer to function returning analog of strcmp for * strings, but supplied by user for comparing the array elements. * it accepts 2 pointers to elements and returns neg if 1<2, 0 if * 1=2, pos if 1>2. * *Exit: * returns void * *Exceptions: * *******************************************************************************/ void shortsort ( uint4 *lo, uint4 *hi, unsigned char *data, uint4 minmatch ) { uint4 *p, *max, ml; /* Note: in assertions below, i and j are alway inside original bound of array to sort. */ while (hi > lo) { /* A[i] <= A[j] for i <= j, j > hi */ max = lo; for (p = lo+1; p <= hi; p++) { /* A[i] <= A[max] for lo <= i < p */ if (qscompare(*p-minmatch, *max-minmatch, data, &ml) > 0) { max = p; } /* A[i] <= A[max] for lo <= i <= p */ } /* A[i] <= A[max] for lo <= i <= hi */ swap(max, hi) /* A[i] <= A[hi] for i <= hi, so A[i] <= A[j] for i <= j, j >= hi */ hi--; /* A[i] <= A[j] for i <= j, j > hi, loop top condition established */ } /* A[i] <= A[j] for i <= j, j > lo, which implies A[i] <= A[j] for i < j, so array is sorted */ }