C++ gcc extension for non-zero-based array pointer allocation?

前端 未结 1 1701
时光取名叫无心
时光取名叫无心 2021-01-29 09:49

I am looking for a gcc-supported C++ language extension to enable the allocation of non-zero-based array pointers. Ideally I could simply write:

#include

        
相关标签:
1条回答
  • 2021-01-29 10:39

    Assuming you're using gcc on linux x86-64, it supports the intptr_t and uintptr_t types which can hold any pointer value (valid or not) and also support integer arithmetic. uintptr_t is more suitable in this application because it supports mod 2^64 semantics while intptr_t has UB cases.

    As suggested in comments, we can use this to build a class that overloads operator[] and performs range checking:

    #include <iostream> 
    #include <assert.h>
    #include <sstream> // for ostringstream
    #include <vector>  // out_of_range
    #include <cstdint> // uintptr_t
    using namespace std;
    
    
    // Safe non-zero-based array. Includes bounds checking.
    template<typename Elem>
    class Array {
      uintptr_t array; // base value for non-zero-based access
      int       lo;    // lowest valid index
      int       hi;    // highest valid index plus 1
    
    public:
    
      Array(int lo, int hi)
        : array(), lo(lo), hi(hi)
      {
        if (lo > hi)
          {
            ostringstream msg; msg<<"Array(): lo("<<lo<<") > hi("<<hi<< ")";
            throw range_error(msg.str());
          }
        static_assert(sizeof(uintptr_t) == sizeof(void*),
              "Array: uintptr_t size does not match ptr size");
        static_assert(sizeof(ptrdiff_t) == sizeof(uintptr_t),
              "Array: ptrdiff_t size does not match ptr (efficieny issue)");
        Elem* alloc = new Elem[hi-lo];
        assert(alloc); // this is redundant; alloc throws bad_alloc
        array = (uintptr_t)(alloc) - (uintptr_t)(lo * sizeof(Elem));
        // Convert offset to unsigned to avoid overflow UB.
      }
    
    
      //////////////////////////////////////////////////////////////////
      // UNCHECKED access utilities (these method names start with "_").
    
      uintptr_t _get_array(){return array;}
      // Provide direct access to the base pointer (be careful!)
    
      Elem& _at(ptrdiff_t i)
      {return *(Elem*)(array + (uintptr_t)(i * sizeof(Elem)));}
      // Return reference to element (no bounds checking)
      // On GCC 5.4.0 with -O3, this compiles to an 'lea' instruction
    
      Elem* _get_alloc(){return &_at(lo);}
      // Return zero-based array that was allocated
    
      ~Array() {delete[](_get_alloc());}
    
    
      //////////////////////////////
      // SAFE access utilities
    
      Elem& at(ptrdiff_t i)
      {
        if (i < lo || i >= hi)
          {
            ostringstream msg;
            msg << "Array.at(): " << i << " is not in range ["
                << lo << ", " << hi << "]";
            throw out_of_range(msg.str());
          }
        return _at(i);
      }
    
      int get_lo() const {return lo;}
      int get_hi() const {return hi;}
      int size()   const {return hi - lo;}
    
      Elem& operator[](ptrdiff_t i){return at(i);}
      // std::vector is wrong; operator[] is the typical use and should be safe.
      // It's good practice to fix mistakes as we go along.
    
    };
    
    
    // Test
    int main() 
    {  
      const int LO = 1000000000;
      const int HI = LO + 10;
      Array<int> array(LO, HI);
      for (int i=LO; i<HI; i++)
        array[i] = i;
      for (int i=LO; i<HI; i++)
        cout << array[i] << "\n";
    }
    

    Note that it is still not possible to cast the invalid "pointer" calculated by intptr_t to a pointer type, due to GCC 4.7 Arrays and Pointers:

    When casting from pointer to integer and back again, the resulting pointer must reference the same object as the original pointer, otherwise the behavior is undefined. That is, one may not use integer arithmetic to avoid the undefined behavior of pointer arithmetic as proscribed in C99 and C11 6.5.6/8.

    This is why the array field must be of type intptr_t and not Elem*. In other words, behavior is defined so long as the intptr_t is adjusted to point back to the original object before converting back to Elem*.

    0 讨论(0)
提交回复
热议问题