(C++) HugeVector
January 7, 2018 · View on GitHub
(C++) HugeVector
HugeVector is a class similar to std::vector that, in theory can contain 18,446,744,073,709,551,616 elements. In practice, HugeVector is limited by the available memory. On my computer, a std::vector is already limited by the available memory, so I keep using std::vector.
Idea behind HugeVector
Assuming that memory is not a limiting factor, the size of a std::vector is limited by the maximum (unsigned) integer value: 2^32 = 4,294,967,296 = 4 billion. This integer value is used in the std::vector methods resize and the index operator. Perhaps a std::vector can become of a bigger size (using push_back), but one cannot retrieve the, for example, 5th billionth index directly (that is, without using iterators), because there is no integer that can hold such a value.
HugeVector takes the maximum size to 2^64 = 18,446,744,073,709,551,616 = 18 billion billion, by storing elements in a 2D std::vector: there is a std::vector that stores as much std::vectors as it can store (which is 2^32 = 4,294,967,296 = 4 billion). Each of these std::vectors stores as much elements as they can store (which is 2^32 = 4,294,967,296 = 4 billion).
To overcome the maximum integer value, the CLN data type cln::cl_I is used, which can store near-infinite-size integers.
System specifications
Operating system: Ubuntu 10.04 LTS Lucid Lynx
IDE: Qt Creator 2.0.0
Project type: GUI application
Libraries used:
- Boost: version 1.40
- CLN: version 1.3.1
- Qt: version 4.7.0 (32 bit)
- STL: from GCC, shipped with Qt Creator 2.0.0
hugevector.h
#ifndef HUGEVECTOR_H #define HUGEVECTOR_H //--------------------------------------------------------------------------- #include <cassert> #include <vector> #include <boost/lexical_cast.hpp> #include <boost/numeric/conversion/bounds.hpp> #include <boost/numeric/conversion/cast.hpp> #include <cln/cln.h> //--------------------------------------------------------------------------- ///IntToStrWithSep converts an integer to std::string ///and adds thousands seperators. ///From http://www.richelbilderbeek.nl/CppIntToStrWithSep.htm const std::string IntToStrWithSep(cln::cl_I i); //--------------------------------------------------------------------------- ///SafeIntToCli converts an int to cln::cl_I safely. ///From http://www.richelbilderbeek.nl/CppSafeIntToCli.htm const cln::cl_I SafeIntToCli(const int i); //--------------------------------------------------------------------------- ///HugeVector is a class for storing template <class T> struct HugeVector { HugeVector(const cln::cl_I& sz) : m_max_index(cln::isqrt(sz)+1), m_cur_index(0) { std::clog << "HugeVector(" << sz << ")" << ": m_max_index(" << m_max_index << ")\n"; assert(this->max_size() >= sz); //Only add an empty std::vector, //for HugeVector::push_back to work //(because it needs an existing m_v.back()) m_v.push_back(std::vector<T>()); assert(m_max_index < SafeIntToCli(m_v.max_size())); //std::clog << "m_v.max_size: " << m_v.max_size() << '\n'; } const T& operator[](const cln::cl_I& i) const { assert(i >= 0); assert(i < m_max_index * m_max_index); //cln::cl_I_to_int will throw if cln::cl_I is bigger then the maximum //integer value. This method should not throw, however. //x = i / m_max_index const int x = cln::cl_I_to_int(cln::floor1(i,m_max_index)); //y = i % m_max_index const int y = cln::cl_I_to_int(cln::mod(i,m_max_index)); //std::clog << "const T& operator[" << i << "], " // << "retrieves at (x,y) = (" << x << ',' << y << ")\n"; assert(x < boost::numeric_cast<int>(m_v.size())); assert(y < boost::numeric_cast<int>(m_v[x].size())); return m_v[x][y]; } T& operator[](const cln::cl_I& i) { //Calls the const version of operator[] //To avoid duplication in const and non-const member functions [1] return const_cast<T&>(const_cast<const HugeVector<T>& >(*this)[i]); } void push_back(const T& t) { //std::clog << "HugeVector::push_back\n"; const int cur_sz = boost::numeric_cast<int>(m_v[m_cur_index].size()); //std::clog << "Current size: " << cur_sz << '\n'; assert(cur_sz < boost::numeric_cast<int>(m_v[m_cur_index].max_size())); if (cln::cl_I(cur_sz) == m_max_index) { //std::clog << "Add a std::vector\n"; m_v.push_back(std::vector<T>()); ++m_cur_index; } //const int index = boost::numeric_cast<int>(m_v.size()) - 1; assert(m_cur_index >= 0); //std::clog << "Push_back data at m_v["<< m_cur_index << "]\n"; m_v[m_cur_index].push_back(t); } ///HugeVector<T>::reserve reserves at least i elements of space void reserve_all() { this->reserve(max_size()); } void reserve(const cln::cl_I& i) { assert(i >= 0); std::clog << "HugeVector<T>::reserve(" << i << ")\n"; //how many 1D std::vectors to reserve in? //j = 1 + (i / m_max_index) const int j = 1 + cln::cl_I_to_int(cln::floor1(i,m_max_index)); std::clog << "Number of 1D std::vectors: " << j << '\n'; //create space for those 1D std::vectors m_v.resize(j); //reserve in all those 1D std::vectors const int reserve_sz = cln::cl_I_to_int(m_max_index); //std::clog << "Reserved size in 1D std::vectors: " << reserve_sz << '\n'; for (int i=0; i!=j; ++i) { //std::clog << "Reserving " << reserve_sz << " spaces in std::vector[" << i << "]\n"; assert(i < boost::numeric_cast<int>(m_v.size())); assert(reserve_sz < boost::numeric_cast<int>(m_v[i].max_size())); //reserve maximum space m_v[i].reserve(reserve_sz); } } const cln::cl_I max_size() const { const cln::cl_I x = (m_max_index * m_max_index); const cln::cl_I y = x - cln::cl_I(1); std::clog << "HugeVector<T>::max_size() = " << y << '\n'; return y; } private: //The maximum index a std::vector will/can be called from const cln::cl_I m_max_index; //m_cur_index is the index in m_v currently working on //(m_v.size() does not work, because in HugeVector::reserve // the 1D std::vectors must exist, to be able to reserve // memory for them). int m_cur_index; //The 2D std::vector containing all data std::vector<std::vector<T> > m_v; }; //--------------------------------------------------------------------------- ///m_max_index = 2 ^ 27 = 134217728 ///m_max_index = 2 ^ 28 = 268435456 ///m_max_index must be less than std::vector<T>::max_size ///Measured was 357913941, which equals 2 ^ 28.415 ///template <class T> ///const cln::cl_I HugeVector<T>::m_max_index("134217728"); //--------------------------------------------------------------------------- ///IntToStrWithSep converts an integer to std::string ///and adds thousands seperators. ///From http://www.richelbilderbeek.nl/CppIntToStrWithSep.htm const std::string IntToStrWithSep(cln::cl_I i) { std::string s = boost::lexical_cast<std::string>(cln::mod(i,10)); i = cln::floor1(i,10); int d = 1; while (!cln::zerop(i)) { s = boost::lexical_cast<std::string>(cln::mod(i,10)) + (d % 3 == 0 ? "," : "") + s; i = cln::floor1(i,10); ++d; } return s; } //--------------------------------------------------------------------------- ///SafeIntToCli converts an int to cln::cl_I safely. ///From http://www.richelbilderbeek.nl/CppSafeIntToCli.htm const cln::cl_I SafeIntToCli(const int i) { //A cln::cl_I can handle integer values to $2^{29}$ in its constructor if (i < 536870912) { return cln::cl_I(i); } const std::string s = boost::lexical_cast<std::string>(i); return cln::cl_I(s.c_str()); } //--------------------------------------------------------------------------- #endif // HUGEVECTOR_H