4 classes in the following codes: A, B, C and D.
They all have a member operator new[]
.
Besides,
These extra 8 bytes are used to store information regarding what has been allocated in order to destruct objects correctly (the program needs to know how many objects need to be destroyed) and to call T::operator delete[]
with the correct second parameter. According to the generated assembly (see the end of this answer), the value stored is the number of elements (here 10
).
Basically:
for A
and B
, the destructor is a no-op, so there is no need to know how many elements must be destroyed, and you don't have a user-defined delete[]
, so the compiler will use the default one, which apparently does not care about the second parameter;
for C
, the destructor is used-defined, so it must be called (I don't know why this is not optimized... ), so the program needs to know how many objects will be destroyed;
for D
, you have a user-defined D::operator delete[]
, so the program must remember the allocated size in order to send it to D::operator delete[]
when necessary.
If you replace the int
attribute with a type that has a non-trivial destructor (e.g. std::vector<int>
), you will notice these 8 bytes for both A
and B
.
You can look at the generated assembly for C
(g++ 7.2, no optimization):
; C *c = new C[10];
call C::operator new[](unsigned long)
mov QWORD PTR [rax], 10 ; store "10" (allocated objects)
add rax, 8 ; increase pointer by 8
mov QWORD PTR [rbp-24], rax
; delete[] c;
cmp QWORD PTR [rbp-24], 0
je .L5
mov rax, QWORD PTR [rbp-24] ; this is c
sub rax, 8
mov rax, QWORD PTR [rax] ; retrieve the number of objects
lea rdx, [0+rax*4] ; retrieve the associated size (* sizeof(C))
mov rax, QWORD PTR [rbp-24]
lea rbx, [rdx+rax]
.L7:
cmp rbx, QWORD PTR [rbp-24] ; loops to destruct allocated objects
je .L6
sub rbx, 4
mov rdi, rbx
call C::~C()
jmp .L7
.L6:
mov rax, QWORD PTR [rbp-24]
sub rax, 8
mov rax, QWORD PTR [rax] ; retrieve the number of allocated objects
add rax, 2 ; add 2 = 8 bytes / sizeof(C)
lea rdx, [0+rax*4] ; number of allocated bytes
mov rax, QWORD PTR [rbp-24]
sub rax, 8
mov rsi, rdx
mov rdi, rax
call operator delete[](void*, unsigned long)
If you are not familiar with assembly, here is an arranged C++ version of what happens under the hood:
// C *c = new C[10];
char *c_ = (char*)malloc(10 * sizeof(C) + sizeof(std::size_t)); // inside C::operator new[]
*reinterpret_cast<std::size_t*>(c_) = 10; // stores the number of allocated objects
C *c = (C*)(c_ + sizeof(std::size_t)); // retrieve the "correct" pointer
// delete[] c; -- destruction of the allocated objects
char *c_ = (char*)c;
c_ -= sizeof(std::size_t); // retrieve the original pointer
std::size_t n = // retrieve the number of allocated objects
*reinterpret_cast<std::size_t*>(c_);
n = n * sizeof(C); // = n * 4, retrieve the allocated size
c_ = (char*)c + n; // retrieve the "end" pointer
while (c_ != (char*)c) {
c_ -= sizeof(C); // next object
(*reinterpret_cast<C*>(c_)).~C(); // destruct the object
}
// delete[] c; -- freeing of the memory
char *c_ = (char*)c;
c_ -= sizeof(std::size_t);
std::size_t n =
*reinterpret_cast<std::size_t*>(c_); // retrieve the number of allocated objects
n = n * sizeof(C) + sizeof(std::size_t); // note: compiler does funky computation instead of
// this, but I found this clearer
::operator delete[](c_, n);
Now you're happy to know that the compiler does all of this for you ;)