/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * Contains source code from the article "Radix Sort Revisited". * \file Radix.cpp * \author Pierre Terdiman * \date April, 4, 2000 */ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * Revisited Radix Sort. * This is my new radix routine: * - it uses indices and doesn't recopy the values anymore, hence wasting less ram * - it creates all the histograms in one run instead of four * - it sorts words faster than dwords and bytes faster than words * - it correctly sorts negative floating-point values by patching the offsets * - it automatically takes advantage of temporal coherence * - multiple keys support is a side effect of temporal coherence * - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway] * * History: * - 08.15.98: very first version * - 04.04.00: recoded for the radix article * - 12.xx.00: code lifting * - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here) * - 10.11.01: added local ram support * - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting...... * * \class RadixSort * \author Pierre Terdiman * \version 1.3 * \date August, 15, 1998 */ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* To do: - add an offset parameter between two input values (avoid some data recopy sometimes) - unroll ? asm ? */ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Header #include #include // memset //using namespace IceCore; #define DELETEARRAY(a) { delete [] a; a = NULL; } #define CHECKALLOC(a) /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * Constructor. */ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// RadixSort::RadixSort() : mCurrentSize(0), mPreviousSize(0), mIndices(NULL), mIndices2(NULL), mTotalCalls(0), mNbHits(0) { #ifndef RADIX_LOCAL_RAM // Allocate input-independent ram mHistogram = new uint32[256*4]; mOffset = new uint32[256]; #endif // Initialize indices resetIndices(); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * Destructor. */ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// RadixSort::~RadixSort() { // Release everything #ifndef RADIX_LOCAL_RAM DELETEARRAY(mOffset); DELETEARRAY(mHistogram); #endif DELETEARRAY(mIndices2); DELETEARRAY(mIndices); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** * Resizes the inner lists. * \param nb [in] new size (number of dwords) * \return true if success */ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// bool RadixSort::resize(uint32 nb) { // Free previously used ram DELETEARRAY(mIndices2); DELETEARRAY(mIndices); // Get some fresh one mIndices = new uint32[nb]; CHECKALLOC(mIndices); mIndices2 = new uint32[nb]; CHECKALLOC(mIndices2); mCurrentSize = nb; // Initialize indices so that the input buffer is read in sequential order resetIndices(); return true; } #define CHECK_RESIZE(n) \ if(n!=mPreviousSize) \ { \ if(n>mCurrentSize) resize(n); \ else resetIndices(); \ mPreviousSize = n; \ } #define CREATE_HISTOGRAMS(type, buffer) \ /* Clear counters */ \ memset(mHistogram, 0, 256*4*sizeof(uint32)); \ \ /* Prepare for temporal coherence */ \ type PrevVal = (type)buffer[mIndices[0]]; \ bool AlreadySorted = true; /* Optimism... */ \ uint32* Indices = mIndices; \ \ /* Prepare to count */ \ uint8* p = (uint8*)input; \ uint8* pe = &p[nb*4]; \ uint32* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ \ uint32* h1= &mHistogram[256]; /* Histogram for second pass */ \ uint32* h2= &mHistogram[512]; /* Histogram for third pass */ \ uint32* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ \ \ while(p!=pe) \ { \ /* Read input buffer in previous sorted order */ \ type Val = (type)buffer[*Indices++]; \ /* Check whether already sorted or not */ \ if(Val>24; // Radix byte, same as above. AND is useless here (uint32). // ### cmp to be killed. Not good. Later. if(Radix<128) mIndices2[mOffset[Radix]++] = mIndices[i]; // Number is positive, same as above else mIndices2[--mOffset[Radix]] = mIndices[i]; // Number is negative, flip the sorting order } // Swap pointers for next pass. Valid indices - the most recent ones - are in mIndices after the swap. uint32* Tmp = mIndices; mIndices = mIndices2; mIndices2 = Tmp; } else { // The pass is useless, yet we still have to reverse the order of current list if all values are negative. if(UniqueVal>=128) { for(i=0;i