Generate benchmarks for three implementations of a wait-free queue using C++11 atomics
October 23, 2014 ยท View on GitHub
// // Generate benchmarks for the three queue implementations shown in // Jeff Preshing's CppCon 2014 talk, "How Ubisoft Montreal Develops Games for // Multicore - Before and After C++11". // // Slides: https://github.com/CppCon/CppCon2014/blob/master/Presentations/How%20Ubisoft%20Montreal%20Develops%20Games%20for%20Multicore/How%20Ubisoft%20Montreal%20Develops%20Games%20for%20Multicore%20-%20Before%20and%20After%20C++11%20-%20Jeff%20Preshing%20-%20CppCon%202014.pdf?raw=true //
#include
// 0 : Low-level (relaxed) C++11 atomics with standalone memory fences // 1 : Low-level C++11 atomics with ordering constraints // 2 : Sequentially consistent C++11 atomics #define METHOD 0
using namespace std;
template <class T, int size>
class CappedSPSCQueue
{
private:
T m_items[size];
atomic
public: CappedSPSCQueue() : m_writePos(0), m_readPos(0) {}
void clear()
{
m_writePos = 0;
m_readPos = 0;
}
#if METHOD == 0
bool tryPush(const T& item)
{
int w = m_writePos.load(memory_order_relaxed);
if (w >= size)
return false;
m_items[w] = item;
atomic_thread_fence(memory_order_release);
m_writePos.store(w + 1, memory_order_relaxed);
return true;
}
bool tryPop(T& item)
{
int w = m_writePos.load(memory_order_relaxed);
if (m_readPos >= w)
return false;
atomic_thread_fence(memory_order_acquire);
item = m_items[m_readPos];
m_readPos++;
return true;
}
#elif METHOD == 1
bool tryPush(const T& item)
{
int w = m_writePos.load(memory_order_relaxed);
if (w >= size)
return false;
m_items[w] = item;
m_writePos.store(w + 1, memory_order_release);
return true;
}
bool tryPop(T& item)
{
int w = m_writePos.load(memory_order_acquire);
if (m_readPos >= w)
return false;
item = m_items[m_readPos];
m_readPos++;
return true;
}
#elif METHOD == 2
bool tryPush(const T& item)
{
int w = m_writePos;
if (w >= size)
return false;
m_items[w] = item;
m_writePos = w + 1;
return true;
}
bool tryPop(T& item)
{
int w = m_writePos;
if (m_readPos >= w)
return false;
item = m_items[m_readPos];
m_readPos++;
return true;
}
#endif };
class Timer { private: chrono::high_resolution_clock::time_point t0, t1; double span;
public:
Timer()
{
span = 0;
}
void begin()
{
t0 = chrono::high_resolution_clock::now();
}
void end()
{
t1 = chrono::high_resolution_clock::now();
span += chrono::duration_cast<chrono::duration
static const int limit = 10000000;
CappedSPSCQueue<int, limit> TestQueue;
atomic
void writer() { ready = true; writeTimer.begin(); for (int i = 0; i < limit; i++) { TestQueue.tryPush(i); } writeTimer.end(); }
void reader() { static volatile int destination; while (!ready) {} readTimer.begin(); for (int i = 0; i < limit; i++) { int y; while (!TestQueue.tryPop((int&) y)) {} // Force the result to be stored somewhere, otherwise // the compiler will optimize y away and not bother // copying the item from the queue at all destination = y; } readTimer.end(); }
#if METHOD < 2 && !defined(arm) static const int iterations = 100; #else static const int iterations = 20; #endif
extern "C" void tests() { printf("METHOD = %d, queue size = %d\n", METHOD, limit);
writeTimer.clear();
readTimer.clear();
for (int i = 0; i < iterations; i++)
{
TestQueue.clear();
ready = false;
writer();
reader();
}
printf("separate: tryPush=%f ns/item, tryPop=%f ns/item\n", writeTimer.duration() / iterations / limit * 10e9, readTimer.duration() / iterations / limit * 10e9);
writeTimer.clear();
readTimer.clear();
for (int i = 0; i < iterations; i++)
{
TestQueue.clear();
ready = false;
thread t1(writer);
reader();
t1.join();
}
printf("together: tryPush=%f ns/item, tryPop=%f ns/item\n", writeTimer.duration() / iterations / limit * 10e9, readTimer.duration() / iterations / limit * 10e9);
}
int main(int argc, const char * argv[]) { tests(); return 0; }