/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * Contains source code from the article "Radix Sort Revisited". * \file Radix.cpp * \author Pierre Terdiman * \date April, 4, 2000 */ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // References: // http://www.codercorner.com/RadixSortRevisited.htm // http://www.stereopsis.com/radix.html /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * Revisited Radix Sort. * This is my new radix routine: * - it uses indices and doesn't recopy the values anymore, hence wasting less ram * - it creates all the histograms in one run instead of four * - it sorts words faster than dwords and bytes faster than words * - it correctly sorts negative floating-point values by patching the offsets * - it automatically takes advantage of temporal coherence * - multiple keys support is a side effect of temporal coherence * - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway] * * History: * - 08.15.98: very first version * - 04.04.00: recoded for the radix article * - 12.xx.00: code lifting * - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here) * - 10.11.01: added local ram support * - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting...... * - 01.02.02: - "mIndices" renamed => "mRanks". That's a rank sorter after all. * - ranks are not "reset" anymore, but implicit on first calls * - 07.05.02: offsets rewritten with one less indirection. * - 11.03.02: "bool" replaced with RadixHint enum * - 07.15.04: stack-based radix added * - we want to use the radix sort but without making it static, and without allocating anything. * - we internally allocate two arrays of ranks. Each of them has N uint32s to sort N values. * - 1Mb/2/sizeof(uint32) = 131072 values max, at the same time. * - 09.22.04: - adapted to MacOS by Chris Lamb * - 01.12.06: - added optimizations suggested by Kyle Hubert * - 04.06.08: - Fix bug negative zero sorting bug by Ignacio CastaƱo * * \class RadixSort * \author Pierre Terdiman * \version 1.5 * \date August, 15, 1998 */ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Header #include #include // memset //using namespace IceCore; #define INVALIDATE_RANKS mCurrentSize|=0x80000000 #define VALIDATE_RANKS mCurrentSize&=0x7fffffff #define CURRENT_SIZE (mCurrentSize&0x7fffffff) #define INVALID_RANKS (mCurrentSize&0x80000000) #if NV_BIG_ENDIAN #define H0_OFFSET 768 #define H1_OFFSET 512 #define H2_OFFSET 256 #define H3_OFFSET 0 #define BYTES_INC (3-j) #else #define H0_OFFSET 0 #define H1_OFFSET 256 #define H2_OFFSET 512 #define H3_OFFSET 768 #define BYTES_INC j #endif #define CREATE_HISTOGRAMS(type, buffer) \ /* Clear counters/histograms */ \ memset(mHistogram, 0, 256*4*sizeof(uint32)); \ \ /* Prepare to count */ \ const uint8* p = (const uint8*)input; \ const uint8* pe = &p[nb*4]; \ uint32* h0= &mHistogram[H0_OFFSET]; /* Histogram for first pass (LSB) */ \ uint32* h1= &mHistogram[H1_OFFSET]; /* Histogram for second pass */ \ uint32* h2= &mHistogram[H2_OFFSET]; /* Histogram for third pass */ \ uint32* h3= &mHistogram[H3_OFFSET]; /* Histogram for last pass (MSB) */ \ \ bool AlreadySorted = true; /* Optimism... */ \ \ if(INVALID_RANKS) \ { \ /* Prepare for temporal coherence */ \ type* Running = (type*)buffer; \ type PrevVal = *Running; \ \ while(p!=pe) \ { \ /* Read input buffer in previous sorted order */ \ type Val = *Running++; \ /* Check whether already sorted or not */ \ if(ValCurSize) resize(nb); mCurrentSize = nb; INVALIDATE_RANKS; } } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * Main sort routine. * This one is for integer values. After the call, mIndices contains a list of indices in sorted order, i.e. in the order you may process your data. * \param input [in] a list of integer values to sort * \param nb [in] number of values to sort * \param signedvalues [in] true to handle negative values, false if you know your input buffer only contains positive values * \return Self-Reference */ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// RadixSort& RadixSort::sort(const uint32* input, uint32 nb, bool signedValues/*=true*/) { // Checkings if(!input || !nb || nb&0x80000000) return *this; // Stats mTotalCalls++; // Resize lists if needed checkResize(nb); // Allocate histograms & offsets on the stack uint32 mHistogram[256*4]; uint32* mLink[256]; // Create histograms (counters). Counters for all passes are created in one run. // Pros: read input buffer once instead of four times // Cons: mHistogram is 4Kb instead of 1Kb // We must take care of signed/unsigned values for temporal coherence.... I just // have 2 code paths even if just a single opcode changes. Self-modifying code, someone? if(!signedValues) { CREATE_HISTOGRAMS(uint32, input); } else { CREATE_HISTOGRAMS(int32, input); } // Radix sort, j is the pass number (0=LSB, 3=MSB) for(uint32 j=0;j<4;j++) { CHECK_PASS_VALIDITY(j); // Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is // not a problem, numbers are correctly sorted anyway. if(PerformPass) { // Should we care about negative values? if(j!=3 || !signedValues) { // Here we deal with positive values only // Create offsets mLink[0] = mRanks2; for(uint32 i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; } else { // This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place. mLink[128] = mRanks2; for(uint32 i=129;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; mLink[0] = mLink[255] + CurCount[255]; for(uint32 i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; } // Perform Radix Sort const uint8* InputBytes = (const uint8*)input; InputBytes += BYTES_INC; if(INVALID_RANKS) { for(uint32 i=0;i 126; i--) mLink[i] = mLink[i+1] + CurCount[i]; mLink[0] = mLink[127] + CurCount[127]; for(uint32 i = 1; i < 127; i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // Perform Radix Sort if(INVALID_RANKS) { for(uint32 i=0;i>24; // Radix byte, same as above. AND is useless here (uint32). // ### cmp to be killed. Not good. Later. if(Radix<128) *mLink[Radix]++ = i; // Number is positive, same as above else *(--mLink[Radix]) = i; // Number is negative, flip the sorting order } VALIDATE_RANKS; } else { for(uint32 i=0;i>24; // Radix byte, same as above. AND is useless here (uint32). // ### cmp to be killed. Not good. Later. if(Radix<128) *mLink[Radix]++ = mRanks[i]; // Number is positive, same as above else *(--mLink[Radix]) = mRanks[i]; // Number is negative, flip the sorting order } } // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. uint32* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp; } else { // The pass is useless, yet we still have to reverse the order of current list if all values are negative. if(UniqueVal>=128) { if(INVALID_RANKS) { // ###Possible? for(uint32 i=0;i & input) { return sort((const uint32 *)input.buffer(), input.count(), true); } RadixSort & RadixSort::sort(const Array & input) { return sort(input.buffer(), input.count(), false); } RadixSort & RadixSort::sort(const Array & input) { return sort(input.buffer(), input.count()); }