问题
#include <cstdint>
#include <iostream>
#include <numaif.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <limits>
int main(int argc, char** argv) {
const constexpr uint64_t size = 16lu * 1024 * 1024;
const constexpr uint32_t nPages = size / (4lu * 1024 * 1024);
int32_t status[nPages];
std::fill_n(status, nPages, std::numeric_limits<int32_t>::min());
void* pages[nPages];
auto fd = shm_open("test_shm", O_RDWR|O_CREAT, 0666);
void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (ptr == MAP_FAILED) {
if (fd > 0) close(fd);
throw "failed to map hugepages";
}
for (uint32_t i = 0; i < nPages; i++) {
pages[i] = (char*)ptr + 4 * 1024 * 1024;
}
if (0 != move_pages(0, nPages, pages, nullptr, status, 0)) {
std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
}
else {
for (uint32_t i = 0; i < nPages; i++) {
std::cout << "page # " << i << " locates at numa node " << status[i] << std::endl;
}
}
munmap(ptr, size);
close(fd);
}
And it prints:
page # 0 locates at numa node -2 page # 1 locates at numa node -2 page # 2 locates at numa node -2 page # 3 locates at numa node -2
According to the manpage, it states:
nodes is an array of integers that specify the desired location for each page. Each element in the array is a node number. nodes can also be NULL, in which case move_pages() does not move any pages but instead will return the node where each page currently resides, in the status array. Obtaining the status of each page may be necessary to determine pages that need to be moved.
Why does it print negative values although querying return success? My machine only has 2 NUMAs -- 0 and 1.
kernel version: 3.10.0-862.2.3.el7.x86_64
Here is the version for hugepages:
#include <cstdint>
#include <iostream>
#include <numaif.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <limits>
int main(int argc, char** argv) {
const int32_t dst_node = strtoul(argv[1], nullptr, 10);
const constexpr uint64_t size = 4lu * 1024 * 1024;
const constexpr uint64_t pageSize = 2lu * 1024 * 1024;
const constexpr uint32_t nPages = size / pageSize;
int32_t status[nPages];
std::fill_n(status, nPages, std::numeric_limits<int32_t>::min());
void* pages[nPages];
int32_t dst_nodes[nPages];
void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_HUGETLB, -1, 0);
if (ptr == MAP_FAILED) {
throw "failed to map hugepages";
}
memset(ptr, 0x41, nPages*pageSize);
for (uint32_t i = 0; i < nPages; i++) {
pages[i] = &((char*)ptr)[i*pageSize];
dst_nodes[i] = dst_node;
}
std::cout << "Before moving" << std::endl;
if (0 != move_pages(0, nPages, pages, nullptr, status, 0)) {
std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
}
else {
for (uint32_t i = 0; i < nPages; i++) {
std::cout << "page # " << i << " locates at numa node " << status[i] << std::endl;
}
}
// real move
if (0 != move_pages(0, nPages, pages, dst_nodes, status, MPOL_MF_MOVE_ALL)) {
std::cout << "failed to move pages because " << strerror(errno) << std::endl;
exit(-1);
}
const constexpr uint64_t smallPageSize = 4lu * 1024;
const constexpr uint32_t nSmallPages = size / smallPageSize;
void* smallPages[nSmallPages];
int32_t smallStatus[nSmallPages] = {std::numeric_limits<int32_t>::min()};
for (uint32_t i = 0; i < nSmallPages; i++) {
smallPages[i] = &((char*)ptr)[i*smallPageSize];
}
std::cout << "after moving" << std::endl;
if (0 != move_pages(0, nSmallPages, smallPages, nullptr, smallStatus, 0)) {
std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
}
else {
for (uint32_t i = 0; i < nSmallPages; i++) {
std::cout << "page # " << i << " locates at numa node " << smallStatus[i] << std::endl;
}
}
}
The interesting thing is that move_pages()
seems to understand hugepages as after the hugepages are moved, I query based on small page size, and it populates the expected NUMA IDs.
回答1:
Your usage of shm_open and mmap probably will not get huge pages as you want.
move_pages
syscall (and libnuma wrapper) works on standard pages of 4096 bytes for x86_64.
And you use move_pages
in wrong way with incorrect 3rd argument "pages". It should be not pointer to memory; but pointer to array which itself will contain nPages pointers:
http://man7.org/linux/man-pages/man2/move_pages.2.html
long move_pages(int pid, unsigned long count, void **pages,
const int *nodes, int *status, int flags);
pages is an array of pointers to the pages that should be moved.
These are pointers that should be aligned to page boundaries.
Addresses are specified as seen by the process specified by pid.
Without correct pointers in the "pages' you will get -14 which is EFAULT according to errno 14
(from moreutils package).
//https://stackoverflow.com/questions/54546367/fail-to-query-via-move-pages
//g++ 54546367.move_pages.cc -o 54546367.move_pages -lnuma -lrt
#include <cstdint>
#include <iostream>
#include <numaif.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
#include <string.h>
#include <limits>
int main(int argc, char** argv) {
const constexpr uint64_t size = 256lu * 1024;// * 1024;
const constexpr uint32_t nPages = size / (4lu * 1024);
void * pages[nPages];
int32_t status[nPages];
std::fill_n(status, nPages, std::numeric_limits<int32_t>::min());
// auto fd = shm_open("test_shm", O_RDWR|O_CREAT, 0666);
// void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
void* ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
std::cout << "Ptr is " << ptr << std::endl;
if (ptr == MAP_FAILED) {
// if (fd > 0) close(fd);
throw "failed to map hugepages";
}
memset(ptr, 0x41, nPages*4096);
for(uint32_t i = 0; i<nPages; i++) {
pages[i] = &((char*)ptr)[i*4096];
}
if (0 != move_pages(0, nPages, pages, nullptr, status, 0)) {
std::cout << "failed to inquiry pages because " << strerror(errno) << std::endl;
}
else {
for (uint32_t i = 0; i < nPages; i++) {
std::cout << "page # " << i << " locates at numa node " << status[i] << std::endl;
}
}
munmap(ptr, size);
// close(fd);
}
With NUMA machine it outputs same node when started as taskset -c 7 ./54546367.move_pages
and interleaved (0 1 0 1) when numactl -i all ./54546367.move_pages
.
来源:https://stackoverflow.com/questions/54546367/fail-to-query-via-move-pages