Merge internal branch.
- Remove old/unused code. - Remove format string constructors. - Better win64 support (vsscanf, prefetch, etc). - Fix radix sort to sort -0 correctly. - Add misc utilities (constraints, timer, cpuinfo, introsort).
This commit is contained in:
@ -3,6 +3,8 @@
|
||||
#ifndef NV_CORE_ALGORITHMS_H
|
||||
#define NV_CORE_ALGORITHMS_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
@ -45,22 +47,42 @@ namespace nv
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// @@ Swap should be implemented here.
|
||||
|
||||
|
||||
#if 0
|
||||
// This does not use swap, but copies, in some cases swaps are much faster than copies!
|
||||
// Container should implement operator[], and size()
|
||||
template <class Container, class T>
|
||||
void insertionSort(Container<T> & container)
|
||||
// @@ Should swap be implemented here?
|
||||
|
||||
|
||||
|
||||
template <typename T, template <typename T> class C>
|
||||
void sort(C<T> & container)
|
||||
{
|
||||
const uint n = container.size();
|
||||
for (uint i=1; i < n; ++i)
|
||||
introsortLoop(container, 0, container.count());
|
||||
insertionSort(container, 0, container.count());
|
||||
}
|
||||
|
||||
template <typename T, template <typename T> class C>
|
||||
void sort(C<T> & container, uint begin, uint end)
|
||||
{
|
||||
if (begin < end)
|
||||
{
|
||||
introsortLoop(container, begin, end);
|
||||
insertionSort(container, begin, end);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, template <typename T> class C>
|
||||
void insertionSort(C<T> & container)
|
||||
{
|
||||
insertionSort(container, 0, container.count());
|
||||
}
|
||||
|
||||
template <typename T, template <typename T> class C>
|
||||
void insertionSort(C<T> & container, uint begin, uint end)
|
||||
{
|
||||
for (uint i = begin + 1; i != end; ++i)
|
||||
{
|
||||
T value = container[i];
|
||||
|
||||
uint j = i;
|
||||
while (j > 0 && container[j-1] > value)
|
||||
while (j != begin && container[j-1] > value)
|
||||
{
|
||||
container[j] = container[j-1];
|
||||
--j;
|
||||
@ -72,75 +94,60 @@ namespace nv
|
||||
}
|
||||
}
|
||||
|
||||
template <class Container, class T>
|
||||
void quickSort(Container<T> & container)
|
||||
{
|
||||
quickSort(container, 0, container.count());
|
||||
}
|
||||
|
||||
{
|
||||
/* threshhold for transitioning to insertion sort */
|
||||
while (n > 12) {
|
||||
int c01,c12,c,m,i,j;
|
||||
template <typename T, template <typename T> class C>
|
||||
void introsortLoop(C<T> & container, uint begin, uint end)
|
||||
{
|
||||
while (end-begin > 16)
|
||||
{
|
||||
uint p = partition(container, begin, end, medianof3(container, begin, begin+((end-begin)/2)+1, end-1));
|
||||
introsortLoop(container, p, end);
|
||||
end = p;
|
||||
}
|
||||
}
|
||||
|
||||
/* compute median of three */
|
||||
m = n >> 1;
|
||||
c = p[0] > p[m];
|
||||
c01 = c;
|
||||
c = &p[m] > &p[n-1];
|
||||
c12 = c;
|
||||
/* if 0 >= mid >= end, or 0 < mid < end, then use mid */
|
||||
if (c01 != c12) {
|
||||
/* otherwise, we'll need to swap something else to middle */
|
||||
int z;
|
||||
c = p[0] < p[n-1];
|
||||
/* 0>mid && mid<n: 0>n => n; 0<n => 0 */
|
||||
/* 0<mid && mid>n: 0>n => 0; 0<n => n */
|
||||
z = (c == c12) ? 0 : n-1;
|
||||
swap(p[z], p[m]);
|
||||
template <typename T, template <typename T> class C>
|
||||
uint partition(C<T> & a, uint begin, uint end, const T & x)
|
||||
{
|
||||
int i = begin, j = end;
|
||||
while (true)
|
||||
{
|
||||
while (a[i] < x) ++i;
|
||||
--j;
|
||||
while (x < a[j]) --j;
|
||||
if (i >= j)
|
||||
return i;
|
||||
swap(a[i], a[j]);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, template <typename T> class C>
|
||||
const T & medianof3(C<T> & a, uint lo, uint mid, uint hi)
|
||||
{
|
||||
if (a[mid] < a[lo])
|
||||
{
|
||||
if (a[hi] < a[mid])
|
||||
{
|
||||
return a[mid];
|
||||
}
|
||||
/* now p[m] is the median-of-three */
|
||||
/* swap it to the beginning so it won't move around */
|
||||
swap(p[0], p[m]);
|
||||
|
||||
/* partition loop */
|
||||
i=1;
|
||||
j=n-1;
|
||||
for(;;) {
|
||||
/* handling of equality is crucial here */
|
||||
/* for sentinels & efficiency with duplicates */
|
||||
for (;;++i) {
|
||||
c = p[i] > p[0];
|
||||
if (!c) break;
|
||||
}
|
||||
a = &p[0];
|
||||
for (;;--j) {
|
||||
b=&p[j];
|
||||
c = p[j] > p[0]
|
||||
if (!c) break;
|
||||
}
|
||||
/* make sure we haven't crossed */
|
||||
if (i >= j) break;
|
||||
swap(p[i], p[j]);
|
||||
|
||||
++i;
|
||||
--j;
|
||||
}
|
||||
/* recurse on smaller side, iterate on larger */
|
||||
if (j < (n-i)) {
|
||||
quickSort(p, j);
|
||||
p = p+i;
|
||||
n = n-i;
|
||||
}
|
||||
else {
|
||||
quickSort(p+i, n-i);
|
||||
n = j;
|
||||
else
|
||||
{
|
||||
return (a[hi] < a[lo]) ? a[hi] : a[lo];
|
||||
}
|
||||
}
|
||||
|
||||
insertionSort();
|
||||
}
|
||||
#endif // 0
|
||||
else
|
||||
{
|
||||
if (a[hi] < a[mid])
|
||||
{
|
||||
return (a[hi] < a[lo]) ? a[lo] : a[hi];
|
||||
}
|
||||
else
|
||||
{
|
||||
return a[mid];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
Reference in New Issue
Block a user