/* Spreadsort in C Source: http://en.wikipedia.org/wiki/Spreadsort This code is licensed under the Creative Commons Attribution-ShareAlike License. It is from the Wikipedia article "Spreadsort" dated 2009-06-01. */ /* Spreadsort is a sorting algorithm invented by Steven J. Ross in 2002. It combines concepts from distribution-based sorts, such as radix sort and bucket sort, with partitioning concepts from comparison sorts such as quicksort and mergesort. In experimental results it was shown to be highly efficient, often outperforming traditional algorithms such as quicksort, particularly on distributions exhibiting structure. */ ; unsigned RoughLog2(DATATYPE input) { unsigned char cResult = 0; // The && is necessary on some compilers to avoid infinite loops; it doesn't // significantly impair performance if(input >= 0) while((input >> cResult) && (cResult < DATA_SIZE)) cResult++; else while(((input >> cResult) < -1) && (cResult < DATA_SIZE)) cResult++; return cResult; } SIZETYPE GetMaxCount(unsigned logRange, unsigned uCount) { unsigned logSize = RoughLog2Size(uCount); unsigned uRelativeWidth = (LOG_CONST * logRange)/((logSize > MAX_SPLITS) ? MAX_SPLITS : logSize); // Don't try to bitshift more than the size of an element if(DATA_SIZE <= uRelativeWidth) uRelativeWidth = DATA_SIZE - 1; return 1 << ((uRelativeWidth < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ? (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) : uRelativeWidth); } void FindExtremes(DATATYPE *Array, SIZETYPE uCount, DATATYPE & piMax, DATATYPE & piMin) { SIZETYPE u = 1; piMin = piMax = Array[0]; for(; u < uCount; ++u){ if(Array[u] > piMax) piMax=Array[u]; else if(Array[u] < piMin) piMin= Array[u]; } } //---------------------SpreadSort Source----------------- Bin * SpreadSortCore(DATATYPE *Array, SIZETYPE uCount, SIZETYPE & uBinCount, DATATYPE &iMax, DATATYPE &iMin) { // This step is roughly 10% of runtime but it helps avoid worst-case // behavior and improves behavior with real data. If you know the // maximum and minimum ahead of time, you can pass those values in // and skip this step for the first iteration FindExtremes((DATATYPE *) Array, uCount, iMax, iMin); if(iMax == iMin) return NULL; DATATYPE divMin,divMax; SIZETYPE u; int LogDivisor; Bin * BinArray; Bin* CurrentBin; unsigned logRange; logRange = RoughLog2Size((SIZETYPE)iMax-iMin); if((LogDivisor = logRange - RoughLog2Size(uCount) + LOG_MEAN_BIN_SIZE) < 0) LogDivisor = 0; // The below if statement is only necessary on systems with high memory // latency relative to processor speed (most modern processors) if((logRange - LogDivisor) > MAX_SPLITS) LogDivisor = logRange - MAX_SPLITS; divMin = iMin >> LogDivisor; divMax = iMax >> LogDivisor; uBinCount = divMax - divMin + 1; // Allocate the bins and determine their sizes BinArray = (Bin *) calloc(uBinCount, sizeof(Bin)); // Memory allocation failure check and clean return with sorted results if(!BinArray) { printf("Using std::sort because of memory allocation failure\n"); std::sort(Array, Array + uCount); return NULL; } // Calculating the size of each bin; this takes roughly 10% of runtime for(u = 0; u < uCount; ++u) BinArray[(Array[u] >> LogDivisor) - divMin].uCount++; // Assign the bin positions BinArray[0].CurrentPosition = (DATATYPE *)Array; for(u = 0; u < uBinCount - 1; u++) { BinArray[u + 1].CurrentPosition = BinArray[u].CurrentPosition + BinArray[u].uCount; BinArray[u].uCount = BinArray[u].CurrentPosition - Array; } BinArray[u].uCount = BinArray[u].CurrentPosition - Array; // Swap into place. This dominates runtime, especially in the swap; // std::sort calls are the other main time-user. for(u = 0; u < uCount; ++u) { for(CurrentBin = BinArray + ((Array[u] >> LogDivisor) - divMin); (CurrentBin->uCount > u); CurrentBin = BinArray + ((Array[u] >> LogDivisor) - divMin)) SWAP(Array + u, CurrentBin->CurrentPosition++); // Now that we've found the item belonging in this position, // increment the bucket count if(CurrentBin->CurrentPosition == Array + u) ++(CurrentBin->CurrentPosition); } // If we've bucketsorted, the array is sorted and we should skip recursion if(!LogDivisor) { free(BinArray); return NULL; } return BinArray; } void SpreadSortBins(DATATYPE *Array, SIZETYPE uCount, SIZETYPE uBinCount, const DATATYPE &iMax , const DATATYPE &iMin, Bin * BinArray, SIZETYPE uMaxCount) { SIZETYPE u; for(u = 0; u < uBinCount; u++){ SIZETYPE count = (BinArray[u].CurrentPosition - Array) - BinArray[u].uCount; // Don't sort unless there are at least two items to compare if(count < 2) continue; if(count < uMaxCount) std::sort(Array + BinArray[u].uCount, BinArray[u].CurrentPosition); else SpreadSortRec(Array + BinArray[u].uCount, count); } free(BinArray); } void SpreadSortRec(DATATYPE *Array, SIZETYPE uCount) { if(uCount < 2) return; DATATYPE iMax, iMin; SIZETYPE uBinCount; Bin * BinArray = SpreadSortCore(Array, uCount, uBinCount, iMax, iMin); if(!BinArray) return; SpreadSortBins(Array, uCount, uBinCount, iMax, iMin, BinArray, GetMaxCount(RoughLog2Size((SIZETYPE)iMax-iMin), uCount)); }
Hide code
Visustin flow chart for C/C++