代码拉取完成,页面将自动刷新
同步操作将从 src-openEuler/criu 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
From 4c11832330e6c7b924b96c7ea70c14025fe0d970 Mon Sep 17 00:00:00 2001
From: "fu.lin" <[email protected]>
Date: Tue, 13 Apr 2021 14:10:23 +0800
Subject: [PATCH 6/6] criu: add pin memory method
We can use the checkpoint and restore in userspace method to dump
and restore tasks when updating the kernel. Currently, criu needs
dump all memory data of tasks to files. When the memory size is
very large (large than 1GiB), the cost time of the dumping data
will be very long (more than 1 min).
We can pin the memory data of tasks and collect the corresponding
physical pages mapping info in checkpoint process, and remap the
physical pages to restore tasks in restore process.
Signed-off-by: Jingxian He <[email protected]>
---
criu/config.c | 1 +
criu/cr-restore.c | 5 +++
criu/include/cr_options.h | 1 +
criu/include/restorer.h | 24 ++++++++++++
criu/mem.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++-
criu/pie/restorer.c | 21 ++++++++++-
6 files changed, 146 insertions(+), 2 deletions(-)
diff --git a/criu/config.c b/criu/config.c
index 5a53256..61b81fa 100644
--- a/criu/config.c
+++ b/criu/config.c
@@ -542,6 +542,7 @@ int parse_options(int argc, char **argv, bool *usage_error,
{ "pre-dump-mode", required_argument, 0, 1097},
{ "file-validation", required_argument, 0, 1098 },
BOOL_OPT("with-cpu-affinity", &opts.with_cpu_affinity),
+ BOOL_OPT("pin-memory", &opts.pin_memory),
{ },
};
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index da2e53d..ff41976 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -3866,6 +3866,11 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
task_args->clone_restore_fn,
task_args->thread_args);
+ if (opts.pin_memory)
+ task_args->pin_memory = true;
+ else
+ task_args->pin_memory = false;
+
/*
* An indirect call to task_restore, note it never returns
* and restoring core is extremely destructive.
diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h
index fda54a4..a4dc5b8 100644
--- a/criu/include/cr_options.h
+++ b/criu/include/cr_options.h
@@ -176,6 +176,7 @@ struct cr_options {
int file_validation_method;
/* restore cpu affinity */
int with_cpu_affinity;
+ int pin_memory;
};
extern struct cr_options opts;
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
index bd6ef6a..fc37e6d 100644
--- a/criu/include/restorer.h
+++ b/criu/include/restorer.h
@@ -225,6 +225,7 @@ struct task_restore_args {
int lsm_type;
int child_subreaper;
bool has_clone3_set_tid;
+ bool pin_memory;
} __aligned(64);
/*
@@ -317,4 +318,27 @@ enum {
#define __r_sym(name) restorer_sym ## name
#define restorer_sym(rblob, name) (void*)(rblob + __r_sym(name))
+#define PIN_MEM_FILE "/dev/pinmem"
+#define PIN_MEM_MAGIC 0x59
+#define _SET_PIN_MEM_AREA 1
+#define _CLEAR_PIN_MEM_AREA 2
+#define _REMAP_PIN_MEM_AREA 3
+#define _PIN_MEM_IOC_MAX_NR 4
+#define SET_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _SET_PIN_MEM_AREA, struct pin_mem_area_set)
+#define CLEAR_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _CLEAR_PIN_MEM_AREA, int)
+#define REMAP_PIN_MEM_AREA _IOW(PIN_MEM_MAGIC, _REMAP_PIN_MEM_AREA, int)
+
+#define ONCE_PIN_MEM_SIZE_LIMIT 32 * 1024 * 1024
+#define MAX_PIN_MEM_AREA_NUM 16
+struct pin_mem_area {
+ unsigned long virt_start;
+ unsigned long virt_end;
+};
+
+struct pin_mem_area_set {
+ unsigned int pid;
+ unsigned int area_num;
+ struct pin_mem_area mem_area[MAX_PIN_MEM_AREA_NUM];
+};
+
#endif /* __CR_RESTORER_H__ */
diff --git a/criu/mem.c b/criu/mem.c
index 167838b..709de4e 100644
--- a/criu/mem.c
+++ b/criu/mem.c
@@ -438,6 +438,88 @@ again:
return ret;
}
+bool should_pin_vmae(VmaEntry *vmae)
+{
+ /*
+ * vDSO area must be always dumped because on restore
+ * we might need to generate a proxy.
+ */
+ if (vma_entry_is(vmae, VMA_AREA_VDSO))
+ return false;
+ /*
+ * In turn VVAR area is special and referenced from
+ * vDSO area by IP addressing (at least on x86) thus
+ * never ever dump its content but always use one provided
+ * by the kernel on restore, ie runtime VVAR area must
+ * be remapped into proper place..
+ */
+ if (vma_entry_is(vmae, VMA_AREA_VVAR))
+ return false;
+
+ if (vma_entry_is(vmae, VMA_AREA_AIORING))
+ return false;
+ if (vma_entry_is(vmae, VMA_ANON_PRIVATE)) {
+ pr_debug("find private anon vma: %lx-%lx\n", vmae->start, vmae->end);
+ return true;
+ }
+
+ return false;
+}
+
+static int pin_one_pmas(int fd, unsigned long start,
+ unsigned long *pend, struct pstree_item *item)
+{
+ int ret;
+ unsigned int index = 0;
+ unsigned long end;
+ unsigned long next = start;
+ struct pin_mem_area_set pmas;
+ struct pin_mem_area *pma;
+
+ end = *pend;
+ while (start < end) {
+ next = (start + ONCE_PIN_MEM_SIZE_LIMIT > end) ? end : (start + ONCE_PIN_MEM_SIZE_LIMIT);
+ pma = &(pmas.mem_area[index]);
+ pma->virt_start = start;
+ pma->virt_end = next;
+ pr_info("start pin %lx-%lx\n", start, next);
+ index++;
+ start += ONCE_PIN_MEM_SIZE_LIMIT;
+ if (index >= MAX_PIN_MEM_AREA_NUM)
+ break;
+ }
+ *pend = next;
+ pmas.area_num = index;
+ pmas.pid = vpid(item);
+ pr_info("begin pin memory for pid:%d\n", pmas.pid);
+ ret = ioctl(fd, SET_PIN_MEM_AREA, &pmas);
+ if (ret < 0)
+ pr_err("pin mem fail, errno: %s\n", strerror(errno));
+ return ret;
+}
+static int pin_vmae(VmaEntry *vmae, struct pstree_item *item)
+{
+ int fd;
+ int ret = 0;
+ unsigned long start, end;
+
+ fd = open(PIN_MEM_FILE, O_RDWR);
+ if (fd < 0) {
+ pr_err("open file: %s fail.\n", PIN_MEM_FILE);
+ return -1;
+ }
+ start = vmae->start;
+ while (start < vmae->end) {
+ end = vmae->end;
+ ret = pin_one_pmas(fd, start, &end, item);
+ if (ret < 0)
+ break;
+ start = end;
+ }
+ close(fd);
+ return ret;
+}
+
static int __parasite_dump_pages_seized(struct pstree_item *item,
struct parasite_dump_pages_args *args,
struct vm_area_list *vma_area_list,
@@ -513,7 +595,16 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
if (possible_pid_reuse == -1)
goto out_xfer;
}
-
+ if (opts.pin_memory) {
+ /* pin memory before dump pages */
+ list_for_each_entry(vma_area, &vma_area_list->h, list) {
+ if (should_pin_vmae(vma_area->e)) {
+ ret = pin_vmae(vma_area->e, item);
+ if (ret)
+ goto out_xfer;
+ }
+ }
+ }
/*
* Step 1 -- generate the pagemap
@@ -524,6 +615,9 @@ static int __parasite_dump_pages_seized(struct pstree_item *item,
parent_predump_mode = mdc->parent_ie->pre_dump_mode;
list_for_each_entry(vma_area, &vma_area_list->h, list) {
+ if (opts.pin_memory && should_pin_vmae(vma_area->e))
+ continue;
+
ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl,
&pmc, has_parent, mdc->pre_dump,
parent_predump_mode);
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index c63f96b..f3bd541 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -1414,6 +1414,24 @@ int cleanup_current_inotify_events(struct task_restore_args *task_args)
return 0;
}
+int remap_vmas(int pid)
+{
+ int fd, ret = 0;
+
+ fd = sys_open(PIN_MEM_FILE, O_RDWR, 0);
+ if (fd == -1) {
+ pr_err("open file: %s fail.\n", PIN_MEM_FILE);
+ return -1;;
+ }
+
+ ret = sys_ioctl(fd, REMAP_PIN_MEM_AREA, (unsigned long) &pid);
+ if (ret < 0)
+ pr_err("remap pin mem fail for pid: %d\n", pid);
+ sys_close(fd);
+ return ret;
+}
+
+
/*
* The main routine to restore task via sigreturn.
* This one is very special, we never return there
@@ -1585,7 +1603,8 @@ long __export_restore_task(struct task_restore_args *args)
goto core_restore_end;
}
}
-
+ if (args->pin_memory)
+ remap_vmas(my_pid);
/*
* Now read the contents (if any)
*/
--
1.8.3.1
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。