well, James, perhaps later i'll get gcc to re-write code to it, or shall
do it on Java. about readability: high-performance code frequently ain't
good readable. Automatic Optimization with such technique would be
preferable, but AO is too closely related with ATP (automated theorem
proving). to avoid IFs ain't easy-go Beast at many cases.
Hight performance code does not have to be unreadable.
In fact, with modern optimizing compilers, it is far from obvious to hand-
optimize code, because any optimization you make might inhibit the
compiler from performing further optimizations, in effect rendering the
result *less* optimized.
As a point in fact, here are some measurements of your if-less sort
algorithms:
Optimised version (-O3):
(C) 2013 Evgeney Knyazhev.
Start No-IF heapsort
No-IF heapsort: 100000 ticks (100.000000 msec)
Start qsort
qsort: 20000 ticks (20.000000 msec)
Start std::sort
std::sort: 20000 ticks (20.000000 msec)
Start std::make_heap + std::sort_heap
std::make_heap + std::sort_heap: 60000 ticks (60.000000 msec)
Start No-IF two-way tides
No-IF two-way tides: 122200000 ticks (122200.000000 msec)
Debug version (-O0):
(C) 2013 Evgeney Knyazhev.
Start No-IF heapsort
No-IF heapsort: 140000 ticks (140.000000 msec)
Start qsort
qsort: 20000 ticks (20.000000 msec)
Start std::sort
std::sort: 30000 ticks (30.000000 msec)
Start std::make_heap + std::sort_heap
std::make_heap + std::sort_heap: 100000 ticks (100.000000 msec)
Start No-IF two-way tides
No-IF two-way tides: 221790000 ticks (221790.000000 msec)
As you can see, the No-IF algorithms are the two slowest ones (the two-
way tides might even be outperformed by a bogo-sort).
The code that generated the results (removed most of the C++ specific
stuff to keep it on topic here):
// IF-free sorting.cpp : Defines the entry point for the console
application.
// (C) 2013 Evgeney Knyazhev
// app name: IF-free Alg0Z
// ver: I.0
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <time.h>
#include <algorithm>
void modExp(uint64_t mod, uint64_t *seed, uint64_t exp)
{
int root = 1;
uint64_t lseed = *seed;
for(int i = 0; exp >> (i + 1) > 0; i++)
{
if(((exp >> i) & 1) == 1)
{
root *= lseed;
root %= mod;
}
lseed = (lseed * lseed) % mod;
}
*seed = root;
return;
}
void pseudoRND(int64_t *arr, int num0fItems, uint64_t mod, uint64_t seed,
uint64_t exp)
{
int size0fT1 = sizeof(uint64_t) * 8 - 1;
uint64_t sign = 0;
for(int i = 0; i < num0fItems; i++)
{
modExp(mod, &seed, exp);
sign = (seed >> size0fT1);
arr
= -1 * seed * sign + seed * ((sign + 1) & 1);
}
}
#define chosenT uint64_t
/*********************** HeapSort ********************************/
inline void swp(int64_t *p0, int64_t *p1)
{
int64_t empty;
empty = *p0;
*p0 = *p1;
*p1 = empty;
return;
}
inline int64_t le(int64_t *p0, int64_t *p1)
{
int64_t i1, i2, empty, *pEmpty;
pEmpty = ∅
i2 = (*p0 < *p1);
i1 = (i2 + 1) & 1;
p1 = (int64_t*)(i2 * (chosenT)p1 + i1 * (chosenT)pEmpty);
p0 = (int64_t*)(i2 * (chosenT)p0 + i1 * (chosenT)pEmpty);
empty = *p0;
*p0 = *p1;
*p1 = empty;
return i2;
}
inline int siftDown(int64_t *a, int root, int end)
{
int child;
int count = 0;
int64_t dv;
child = root << 1;
child++;
while(child < end)
{
dv = (child + 1 < end);
dv = (a[child] < a[child+1]) * dv;
child += dv;
dv = le(&a[root], &a[child]);
root = child;
end *= dv;
child = root << 1;
child++;
count++;
}
return count;
}
inline int Heapify(int64_t *a, int end)
{
int root = (end-1) >> 1;
int cnt = 0;
while(root > -1)
{
cnt += siftDown(a, root--, end);
}
return cnt;
}
inline int heapSort(int64_t *arr, int end)
{
int count = Heapify(arr, end);
while(end > 0)
{
swp(&arr[0], &arr[end - 1]);
count += siftDown(arr, 0, end-1);
end--;
}
return count;
}
/*********************** HeapSort ********************************/
int comp(const void* a, const void* b)
{
const int64_t* lhs = (const int64_t*)a;
const int64_t* rhs = (const int64_t*)b;
return lhs - rhs;
}
/****************** main ******************/
int main(int argc, char* argv[])
{
printf("(C) 2013 Evgeney Knyazhev.\n\n");
int size0fInt = sizeof(int64_t) * 8 - 1;
int N0fItems = 100000;
int bottom = 0;
int DownThe;
int UpThe = 1;
int i1;
int i2;
int64_t deltaV;
int64_t *array2sort;
int64_t *unsortedArr;
int64_t *p0;
int64_t *p1;
int64_t *pEmpty;
int64_t empty;
char StopThe = 0;
char GV; /*greater value*/
char unit = 0;
pEmpty = ∅
chosenT seed = 613;
chosenT exp = 259;
chosenT mod = 91477361;
chosenT countIteration;
array2sort = (int64_t*)malloc(N0fItems * size0fInt);
unsortedArr = (int64_t*)malloc(N0fItems * size0fInt);
if(array2sort == NULL || unsortedArr == NULL)
{
printf("Not enough memory for array\n");
return 0;
}
pseudoRND(unsortedArr, N0fItems, mod, seed, exp);
for(unsigned int p = 0; p < (unsigned)N0fItems; p++)
array2sort[p] = unsortedArr[p];
DownThe = N0fItems - 1;
countIteration = 0;
bottom = 0;
StopThe = 0;
printf("Start No-IF heapsort\n");
clock_t start = clock();
countIteration = heapSort(array2sort,N0fItems);
clock_t stop = clock();
printf("No-IF heapsort: %ld ticks (%f msec)\n", stop-start, (1000.0*
(stop-start)/CLOCKS_PER_SEC));
for(unsigned int p = 0; p < (unsigned)N0fItems; p++)
array2sort[p] = unsortedArr[p];
printf("Start qsort\n");
start = clock();
qsort(array2sort, N0fItems, sizeof(array2sort[0]), comp);
stop = clock();
printf("qsort: %ld ticks (%f msec)\n", stop-start, (1000.0*(stop-start)/
CLOCKS_PER_SEC));
for(unsigned int p = 0; p < (unsigned)N0fItems; p++)
array2sort[p] = unsortedArr[p];
printf("Start std::sort\n");
start = clock();
std::sort(array2sort, array2sort+N0fItems);
stop = clock();
printf("std::sort: %ld ticks (%f msec)\n", stop-start, (1000.0*(stop-
start)/CLOCKS_PER_SEC));
for(unsigned int p = 0; p < (unsigned)N0fItems; p++)
array2sort[p] = unsortedArr[p];
printf("Start std::make_heap + std::sort_heap\n");
start = clock();
std::make_heap(array2sort, array2sort+N0fItems);
std::sort_heap(array2sort, array2sort+N0fItems);
stop = clock();
printf("std::make_heap + std::sort_heap: %ld ticks (%f msec)\n", stop-
start, (1000.0*(stop-start)/CLOCKS_PER_SEC));
for(unsigned int p = 0; p < (unsigned)N0fItems; p++)
array2sort[p] = unsortedArr[p];
printf("Start No-IF two-way tides\n");
start = clock();
/***********************************
IF-free Sorting two-way tides
************************************/
while(StopThe != 1)
{
p1 = &array2sort[DownThe];
p0 = &array2sort[DownThe - 1];
deltaV = *p1 - *p0;
GV = ((uint64_t)deltaV) >> size0fInt;
if(*p0 > *p1)
i2 = 1;
i1 = (GV + 1) & 1;
i2 = GV & 1;
p1 = (int64_t*)(i2 * (chosenT)p1 + i1 * (chosenT)pEmpty);
p0 = (int64_t*)(i2 * (chosenT)p0 + i1 * (chosenT)pEmpty);
empty = *p1;
*p1 = *p0;
*p0 = empty;
unit = i2 + i1 * unit;
p1 = &array2sort[UpThe];
p0 = &array2sort[UpThe - 1];
deltaV = *p1 - *p0;
GV = ((uint64_t)deltaV) >> size0fInt;
i1 = (GV + 1) & 1;
i2 = GV & 1;
p1 = (int64_t*)(i2 * (chosenT)p1 + i1 * (chosenT)pEmpty);
p0 = (int64_t*)(i2 * (chosenT)p0 + i1 * (chosenT)pEmpty);
empty = *p1;
*p1 = *p0;
*p0 = empty;
unit = i2 + unit * i1;
UpThe++;
deltaV = UpThe - N0fItems;
GV = ((uint64_t)deltaV) >> size0fInt;
i1 = (GV + 1) & 1;
i2 = GV & 1;
bottom += i1;
UpThe = i2 * UpThe + i1 * bottom;
DownThe--;
/*deltaV=bottom-DownThe;
GV=((unsigned int)deltaV)>>size0fInt;
i1=(GV+1)&1;
i2=GV&1;*/
N0fItems -= i1;
DownThe = i2 * DownThe + i1 * (N0fItems - 1);
StopThe = i1 & (unit + 1);
unit &= i2;
countIteration++;
}
/**************************************
IF-free Sorting two-way tides
***************************************/
stop = clock();
printf("No-IF two-way tides: %ld ticks (%f msec)\n", stop-start,
(1000.0*(stop-start)/CLOCKS_PER_SEC));
return 0;
}
Bart v Ingen Schenau