7 Star 0 Fork 16

src-openEuler/memkind

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0002-Support-initializing-HBW-nodes-from-memory_locality.patch 11.19 KB
一键复制 编辑 原始数据 按行查看 历史
From 448eb95b45b0cf6ecc7cf1a3e24056a2fdae85bd Mon Sep 17 00:00:00 2001
From: Yicong Yang <[email protected]>
Date: Fri, 13 Oct 2023 15:21:11 +0800
Subject: [PATCH] Support initializing HBW nodes from memory_locality
In current implementation we mainly infer the HBW nodes from the
HMAT/SLIT, which may not describe all the cases. For example
the HMAT/SLIT cannot describe the topology below:
[ Node 0 ]
[ CPU 0-3 ][ CPU 4-7 ]
| |
[ HBM 0 ][ HBM 1 ]
[ Node 1 ][ Node 2 ]
CPU 0-7 are in one NUMA node, but CPU 0-3 is closest to HBM 0 while
CPU 4-7 is closest to HBM 1. Current HMAT/SLIT cannot support this
case.
In order to support this, openeuler has merged a HBM device driver
to export the topology by sysfs[1]. The description of above topology
will be like:
$ cat /sys/kernel/hbm_memory/memory_topo/memory_locality
1 0-3
1 4-7
This patch cooperate with the HBM device driver to support initializing
the HBW nodes from memory_locality for memkind. Will try to obtains
the HBW nodes by parsing the memory_locality first, on failure or there
is no memory_locality on the system will fallback to HMAT/SLIT. User
can disable this function by MEMKIND_DISABLE_MEMORY_LOCALITY=1 as well.
[1] https://gitee.com/openeuler/kernel/pulls/451
Signed-off-by: Yicong Yang <[email protected]>
---
include/memkind/internal/memkind_bitmask.h | 2 +
src/memkind_bitmask.c | 185 +++++++++++++++++++++
src/memkind_hbw.c | 42 +++++
3 files changed, 229 insertions(+)
diff --git a/include/memkind/internal/memkind_bitmask.h b/include/memkind/internal/memkind_bitmask.h
index 5c5b8434..6b0c3f64 100644
--- a/include/memkind/internal/memkind_bitmask.h
+++ b/include/memkind/internal/memkind_bitmask.h
@@ -12,6 +12,8 @@ extern "C" {
typedef int (*get_node_bitmask)(struct bitmask **);
+int set_numanode_from_memory_locality(void **numanode,
+ memkind_node_variant_t node_variant);
int set_closest_numanode(get_node_bitmask get_bitmask, void **numanode,
memkind_node_variant_t node_variant);
int set_bitmask_for_current_numanode(unsigned long *nodemask,
diff --git a/src/memkind_bitmask.c b/src/memkind_bitmask.c
index 4f6d9f00..84300395 100644
--- a/src/memkind_bitmask.c
+++ b/src/memkind_bitmask.c
@@ -1,9 +1,11 @@
// SPDX-License-Identifier: BSD-2-Clause
/* Copyright (C) 2019 - 2021 Intel Corporation. */
+#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdint.h>
+#include <stdio.h>
#include <memkind/internal/memkind_bitmask.h>
#include <memkind/internal/memkind_log.h>
@@ -12,6 +14,89 @@
// Vector of CPUs with memory NUMA Node id(s)
VEC(vec_cpu_node, int);
+void init_node_closet_cpu(cpu_set_t **cpunode_mask, int num_cpu, int num_nodes)
+{
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t n;
+ FILE *f;
+
+ /*
+ * The content of /sys/kernel/hbm_memory/memory_topo/memory_locality should
+ * be like:
+ * 2 0-3
+ * 3 4-7
+ * 4 8-11
+ * 5 12-15
+ * 6 16-19
+ * 7 20-23
+ * 8 24-27
+ * 9 28-31
+ *
+ * The 1st column is the HBW node number and the 2nd column is the CPU list
+ * which is closet to the HBW node.
+ */
+ f = fopen("/sys/kernel/hbm_memory/memory_topo/memory_locality", "r");
+ if (!f)
+ return;
+
+ while ((n = getline(&line, &len, f)) != -1) {
+ long int node, begin_cpu, end_cpu;
+ char *begin, *end;
+
+ /* Get the node number first */
+ node = strtol(line, &end, 0);
+
+ /* Either the node number is invalid or the whole line is invalid */
+ if (line == end || node == LONG_MAX || node == LONG_MIN)
+ break;
+
+ if (node >= num_nodes) {
+ log_err("Invalid node number provided by memory_locality.");
+ break;
+ }
+
+ /* Try to find the beginning of the CPU list string */
+ while (*end == ' ' && end != line + len)
+ end++;
+
+ if (end == line + len || !isdigit(*end))
+ break;
+
+ begin = end;
+ do {
+ begin_cpu = strtol(begin, &end, 0);
+ if (begin == end || begin_cpu == LONG_MAX || begin_cpu == LONG_MIN)
+ break;
+
+ /* End of the line */
+ if (*end == '\0' || *end == '\n') {
+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]);
+ break;
+ } else if (*end == ',') {
+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]);
+ } else if (*end == '-' && isdigit(*(++end))) {
+ begin = end;
+ end_cpu = strtol(begin, &end, 0);
+ if (begin == end || end_cpu == LONG_MAX || end_cpu == LONG_MIN)
+ break;
+
+ while (begin_cpu <= end_cpu) {
+ CPU_SET_S(begin_cpu, CPU_ALLOC_SIZE(num_cpu), cpunode_mask[node]);
+ ++begin_cpu;
+ }
+ } else {
+ break;
+ }
+
+ begin = end + 1;
+ } while (begin < line + len);
+ }
+
+ free(line);
+ fclose(f);
+}
+
int memkind_env_get_nodemask(char *nodes_env, struct bitmask **bm)
{
*bm = numa_parse_nodestring(nodes_env);
@@ -22,6 +107,106 @@ int memkind_env_get_nodemask(char *nodes_env, struct bitmask **bm)
return MEMKIND_SUCCESS;
}
+int set_numanode_from_memory_locality(void **numanode,
+ memkind_node_variant_t node_variant)
+{
+ int num_cpu = numa_num_configured_cpus();
+ int cpuset_size = CPU_ALLOC_SIZE(num_cpu);
+ int max_node_id = numa_max_node();
+ cpu_set_t **cpunode_mask;
+ int init_node, cpu_id;
+ int status;
+
+ cpunode_mask = calloc(max_node_id + 1, sizeof(*cpunode_mask));
+ if (!cpunode_mask) {
+ status = MEMKIND_ERROR_MALLOC;
+ log_err("calloc() failed.");
+ goto out;
+ }
+
+ for (init_node = 0; init_node <= max_node_id; init_node++) {
+ cpunode_mask[init_node] = CPU_ALLOC(num_cpu);
+ if (!cpunode_mask[init_node]) {
+ while (init_node >= 0) {
+ CPU_FREE(cpunode_mask[init_node]);
+ init_node--;
+ }
+
+ status = MEMKIND_ERROR_MALLOC;
+ log_err("CPU_ALLOC_SIZE() failed.");
+ goto free_cpunode_mask;
+ }
+
+ CPU_ZERO_S(cpuset_size, cpunode_mask[init_node]);
+ }
+
+ init_node_closet_cpu(cpunode_mask, num_cpu, max_node_id + 1);
+
+ struct vec_cpu_node *node_arr =
+ (struct vec_cpu_node *)calloc(num_cpu, sizeof(struct vec_cpu_node));
+ if (!node_arr) {
+ status = MEMKIND_ERROR_MALLOC;
+ log_err("calloc() failed.");
+ goto free_cpunode_mask_array;
+ }
+
+ /* Scan CPUs once. Assuming the CPU number are much more bigger than NUMA Nodes */
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
+ for (init_node = 0; init_node <= max_node_id; init_node++) {
+ if (CPU_ISSET_S(cpu_id, cpuset_size, cpunode_mask[init_node])) {
+ VEC_PUSH_BACK(&node_arr[cpu_id], init_node);
+
+ /*
+ * A cpu should always have one closet node, log error if
+ * violate this.
+ */
+ if (node_variant == NODE_VARIANT_SINGLE &&
+ VEC_SIZE(&node_arr[cpu_id]) > 1) {
+ log_err("CPU%d has more than one closet node.", cpu_id);
+ status = MEMKIND_ERROR_RUNTIME;
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
+ if (VEC_CAPACITY(&node_arr[cpu_id]))
+ VEC_DELETE(&node_arr[cpu_id]);
+ }
+
+ goto free_node_arr;
+ }
+ }
+ }
+ }
+
+ /* Sanity Check each node_arr */
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
+ if (VEC_SIZE(&node_arr[cpu_id]) == 0) {
+ log_err("CPU%d's nodemask is not initialized.", cpu_id);
+ status = MEMKIND_ERROR_RUNTIME;
+ for (cpu_id = 0; cpu_id < num_cpu; cpu_id++) {
+ if (VEC_CAPACITY(&node_arr[cpu_id]))
+ VEC_DELETE(&node_arr[cpu_id]);
+ }
+
+ goto free_node_arr;
+ }
+ }
+
+ *numanode = node_arr;
+ status = MEMKIND_SUCCESS;
+ goto free_cpunode_mask_array;
+
+free_node_arr:
+ free(node_arr);
+
+free_cpunode_mask_array:
+ for (init_node = 0; init_node <= max_node_id; init_node++)
+ CPU_FREE(cpunode_mask[init_node]);
+
+free_cpunode_mask:
+ free(cpunode_mask);
+
+out:
+ return status;
+}
+
int set_closest_numanode(get_node_bitmask get_bitmask, void **numanode,
memkind_node_variant_t node_variant)
{
diff --git a/src/memkind_hbw.c b/src/memkind_hbw.c
index 077660ab..e9948593 100644
--- a/src/memkind_hbw.c
+++ b/src/memkind_hbw.c
@@ -363,10 +363,36 @@ static bool is_hmat_supported(void)
return true;
}
+/*
+ * OS may provide further information of HBW topology in
+ * /sys/kernel/hbm_memory/memory_topo/memory_locality. Use it unless user
+ * specified HBW nodes or disabled using of memory_locality.
+ */
+static bool use_memory_locality(void)
+{
+ char *memory_locality_disable = memkind_get_env("MEMKIND_DISABLE_MEMORY_LOCALITY");
+
+ if (memory_locality_disable && !strncmp(memory_locality_disable, "1", 1))
+ return false;
+
+ if (memkind_get_env("MEMKIND_HBW_NODES"))
+ return false;
+
+ return true;
+}
+
static void memkind_hbw_closest_numanode_init(void)
{
struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_MULTIPLE];
g->numanode = NULL;
+
+ if (use_memory_locality()) {
+ g->init_err = set_numanode_from_memory_locality(&g->numanode,
+ NODE_VARIANT_MULTIPLE);
+ if (!g->init_err)
+ return;
+ }
+
if (!is_hmat_supported()) {
g->init_err = set_closest_numanode(memkind_hbw_get_nodemask,
&g->numanode, NODE_VARIANT_MULTIPLE);
@@ -380,6 +406,14 @@ static void memkind_hbw_closest_preferred_numanode_init(void)
{
struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_SINGLE];
g->numanode = NULL;
+
+ if (use_memory_locality()) {
+ g->init_err = set_numanode_from_memory_locality(&g->numanode,
+ NODE_VARIANT_SINGLE);
+ if (!g->init_err)
+ return;
+ }
+
if (!is_hmat_supported()) {
g->init_err = set_closest_numanode(memkind_hbw_get_nodemask,
&g->numanode, NODE_VARIANT_SINGLE);
@@ -393,6 +427,14 @@ static void memkind_hbw_all_numanode_init(void)
{
struct hbw_numanode_t *g = &memkind_hbw_numanode_g[NODE_VARIANT_ALL];
g->numanode = NULL;
+
+ if (use_memory_locality()) {
+ g->init_err = set_numanode_from_memory_locality(&g->numanode,
+ NODE_VARIANT_ALL);
+ if (!g->init_err)
+ return;
+ }
+
if (!is_hmat_supported()) {
g->init_err = set_closest_numanode(memkind_hbw_get_nodemask,
&g->numanode, NODE_VARIANT_ALL);
--
2.24.0
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/src-openeuler/memkind.git
[email protected]:src-openeuler/memkind.git
src-openeuler
memkind
memkind
master

搜索帮助