问题
I'm reading an ESRI Shapefile, and to my dismay it uses big endian and little endian at different points (see, for instance, the table at page 4, plus the tables from page 5 to 8).
So I created two functions in C++, one for each endianness.
uint32_t readBig(ifstream& f) {
uint32_t num;
uint8_t buf[4];
f.read((char*)buf,4);
num = buf[3] | buf[2]<<8 | buf[1]<<16 | buf[0]<<24;
return num;
}
uint32_t readLittle(ifstream& f) {
uint32_t num;
f.read(reinterpret_cast<char *>(&num),4);
//f.read((char*)&num,4);
return num;
}
But I'm not sure this is the most efficient way to do it. Can this code be improved? Keep in mind it will run thousands, maybe millions of times for a single shapefile. So to have even one of the functions calling the other seem worse than to have two separate functions. Is there a difference in performance between using reinterpret_cast or explicit type conversion (char*)? Should I use the same in both functions?
回答1:
- Casting between pointer types does not affect performance -- In this case, it's just a technicality to make the compiler happy.
- If you're really making a separate call to
read
for every 32-bit value, the time taken by the byte-swapping operation will likely be in the noise. For speed, you probably should have your own buffering layer so that you inner loop doesn't make any function calls. - It's nice if the swap compiles down to a single opcode (like
bswap
), but whether or not that is possible, or the fastest option, is processor-specific. - If you're really interested in maximizing speed, consider using SIMD intrinsics.
回答2:
In most cases the compiler should generate a bswap instruction, which is probably sufficient. If however you need something faster than that, vpshufb is your friend...
#include <immintrin.h>
#include <cstdint>
// swap byte order in 16 x int16
inline void swap_16xi16(uint16_t input[16])
{
constexpr uint8_t mask_data[] = {
1, 0,
3, 2,
5, 4,
7, 6,
9, 8,
11, 10,
13, 12,
15, 14,
1, 0,
3, 2,
5, 4,
7, 6,
9, 8,
11, 10,
13, 12,
15, 14
};
const __m256i swapped = _mm256_shuffle_epi8(
_mm256_loadu_si256((const __m256i*)input),
_mm256_loadu_si256((const __m256i*)mask_data)
);
_mm256_storeu_si256((__m256i*)input, swapped);
}
// swap byte order in 8 x int32
inline void swap_8xi32(uint32_t input[8])
{
constexpr uint8_t mask_data[] = {
3, 2, 1, 0,
7, 6, 5, 4,
11, 10, 9, 8,
15, 14, 13, 12,
3, 2, 1, 0,
7, 6, 5, 4,
11, 10, 9, 8,
15, 14, 13, 12
};
const __m256i swapped = _mm256_shuffle_epi8(
_mm256_loadu_si256((const __m256i*)input),
_mm256_loadu_si256((const __m256i*)mask_data)
);
_mm256_storeu_si256((__m256i*)input, swapped);
}
// swap byte order in 4 x int64
inline void swap_4xi64(uint64_t input[4])
{
constexpr uint8_t mask_data[] = {
7, 6, 5, 4, 3, 2, 1, 0,
15, 14, 13, 12, 11, 10, 9, 8,
7, 6, 5, 4, 3, 2, 1, 0,
15, 14, 13, 12, 11, 10, 9, 8
};
const __m256i swapped = _mm256_shuffle_epi8(
_mm256_loadu_si256((const __m256i*)input),
_mm256_loadu_si256((const __m256i*)mask_data)
);
_mm256_storeu_si256((__m256i*)input, swapped);
}
inline void swap_16xi16(int16_t input[16])
{ swap_16xi16((uint16_t*)input); }
inline void swap_8xi32(int32_t input[8])
{ swap_8xi32((uint32_t*)input); }
inline void swap_4xi64(int64_t input[4])
{ swap_4xi64((uint64_t*)input); }
inline void swap_8f(float input[8])
{ swap_8xi32((uint32_t*)input); }
inline void swap_4d(double input[4])
{ swap_4xi64((uint64_t*)input); }
来源:https://stackoverflow.com/questions/53530497/reading-integers-in-different-endianness-from-binary-file-in-c