lxc
/
0009-cgroup-refact-cgroup-manager-to-...

From 4592fbcbd0be862cf37a3090f58a4491c430e71a Mon Sep 17 00:00:00 2001
From: lifeng68 <[email protected]>
Date: Mon, 2 Nov 2020 16:53:19 +0800
Subject: [PATCH 09/10] cgroup: refact cgroup manager to single file

Signed-off-by: lifeng68 <[email protected]>
---
 src/lxc/Makefile.am             |    5 +-
 src/lxc/cgroups/cgfsng.c        | 1030 +---------
 src/lxc/cgroups/isulad_cgfsng.c | 3115 +++++++++++++++++++++++++++++++
 3 files changed, 3147 insertions(+), 1003 deletions(-)
 create mode 100644 src/lxc/cgroups/isulad_cgfsng.c

diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
index 0e1ba8da9..dc49c7e22 100644
--- a/src/lxc/Makefile.am
+++ b/src/lxc/Makefile.am
@@ -107,7 +107,6 @@ liblxc_la_SOURCES = af_unix.c af_unix.h \
 		    api_extensions.h \
 		    attach.c attach.h \
 		    caps.c caps.h \
-		    cgroups/cgfsng.c \
 		    cgroups/cgroup.c cgroups/cgroup.h \
 		    cgroups/cgroup2_devices.c cgroups/cgroup2_devices.h \
 		    cgroups/cgroup_utils.c cgroups/cgroup_utils.h \
@@ -174,7 +173,11 @@ liblxc_la_SOURCES += isulad_utils.c isulad_utils.h \
 			json/logger_json_file.c json/logger_json_file.h \
 			json/oci_runtime_spec.c json/oci_runtime_spec.h \
 			json/read-file.c json/read-file.h \
+			cgroups/isulad_cgfsng.c \
 			exec_commands.c exec_commands.h
+
+else
+liblxc_la_SOURCES += cgroups/cgfsng.c
 endif

 if IS_BIONIC
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 1ff3d9812..9b9aaf6c3 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -214,7 +214,6 @@ static char *read_file(const char *fnam)
 	return move_ptr(buf);
 }

-#ifndef HAVE_ISULAD
 /* Taken over modified from the kernel sources. */
 #define NBITS 32 /* bits in uint32_t */
 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
@@ -477,14 +476,13 @@ static bool copy_parent_file(const char *parent_cgroup,
 				       value, child_cgroup, file);
 	return true;
 }
-#endif
+

 static inline bool is_unified_hierarchy(const struct hierarchy *h)
 {
 	return h->version == CGROUP2_SUPER_MAGIC;
 }

-#ifndef HAVE_ISULAD
 /*
  * Initialize the cpuset hierarchy in first directory of @cgroup_leaf and set
  * cgroup.clone_children so that children inherit settings. Since the
@@ -564,7 +562,6 @@ static int cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h,

 	return fret;
 }
-#endif

 /* Given two null-terminated lists of strings, return true if any string is in
  * both.
@@ -958,107 +955,6 @@ struct generic_userns_exec_data {
 	char *path;
 };

-#ifdef HAVE_ISULAD
-
-static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies,
-			const char *container_cgroup)
-{
-	if (!container_cgroup || !hierarchies)
-		return 0;
-
-	for (int i = 0; hierarchies[i]; i++) {
-		struct hierarchy *h = hierarchies[i];
-		int ret;
-
-		if (!h->container_full_path) {
-			h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, container_cgroup, NULL);
-		}
-
-		ret = lxc_rm_rf(h->container_full_path);
-		if (ret < 0) {
-			SYSERROR("Failed to destroy \"%s\"", h->container_full_path);
-			return -1;
-		}
-
-		free_disarm(h->container_full_path);
-	}
-
-	return 0;
-}
-
-static int isulad_cgroup_tree_remove_wrapper(void *data)
-{
-	struct generic_userns_exec_data *arg = data;
-	uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
-	gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
-	int ret;
-
-	if (!lxc_setgroups(0, NULL) && errno != EPERM)
-		return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)");
-
-	ret = setresgid(nsgid, nsgid, nsgid);
-	if (ret < 0)
-		return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)",
-				       (int)nsgid, (int)nsgid, (int)nsgid);
-
-	ret = setresuid(nsuid, nsuid, nsuid);
-	if (ret < 0)
-		return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)",
-				       (int)nsuid, (int)nsuid, (int)nsuid);
-
-	return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup);
-}
-
-__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops,
-						struct lxc_handler *handler)
-{
-	int ret;
-
-	if (!ops) {
-		ERROR("Called with uninitialized cgroup operations");
-		return false;
-	}
-
-	if (!ops->hierarchies) {
-		return false;
-	}
-
-	if (!handler) {
-		ERROR("Called with uninitialized handler");
-		return false;
-	}
-
-	if (!handler->conf) {
-		ERROR("Called with uninitialized conf");
-		return false;
-	}
-
-#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
-	ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices);
-	if (ret < 0)
-		WARN("Failed to detach bpf program from cgroup");
-#endif
-
-	if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) {
-		struct generic_userns_exec_data wrap = {
-			.conf			= handler->conf,
-			.container_cgroup	= ops->container_cgroup,
-			.hierarchies		= ops->hierarchies,
-			.origuid		= 0,
-		};
-		ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper,
-				    &wrap, "cgroup_tree_remove_wrapper");
-	} else {
-		ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup);
-	}
-	if (ret < 0) {
-		SYSWARN("Failed to destroy cgroups");
-		return false;
-	}
-
-	return true;
-}
-#else
 static int cgroup_tree_remove(struct hierarchy **hierarchies,
 			const char *container_cgroup)
 {
@@ -1149,15 +1045,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
 	if (ret < 0)
 		SYSWARN("Failed to destroy cgroups");
 }
-#endif

-#ifdef HAVE_ISULAD
-__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
-						struct lxc_handler *handler)
-{
-	return;
-}
-#else
 __cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
 						struct lxc_handler *handler)
 {
@@ -1230,15 +1118,6 @@ try_lxc_rm_rf:
 			WARN("Failed to destroy \"%s\"", h->monitor_full_path);
 	}
 }
-#endif
-
-#ifdef HAVE_ISULAD
-__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
-						      struct lxc_handler *handler)
-{
-	return true;
-}
-#else

 static int mkdir_eexist_on_last(const char *dir, mode_t mode)
 {
@@ -1398,227 +1277,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
 	ops->monitor_cgroup = move_ptr(monitor_cgroup);
 	return log_info(true, "The monitor process uses \"%s\" as cgroup", ops->monitor_cgroup);
 }
-#endif
-
-#ifdef HAVE_ISULAD
-
-static bool isulad_copy_parent_file(char *path, char *file)
-{
-	int ret;
-	int len = 0;
-	char *value = NULL;
-	char *current = NULL;
-	char *fpath = NULL;
-	char *lastslash = NULL;
-	char oldv;
-
-	fpath = must_make_path(path, file, NULL);
-	current = read_file(fpath);
-
-	if (current == NULL) {
-		SYSERROR("Failed to read file \"%s\"", fpath);
-		free(fpath);
-		return false;
-	}
-
-	if (strcmp(current, "\n") != 0) {
-		free(fpath);
-		free(current);
-		return true;
-	}
-
-	free(fpath);
-	free(current);
-
-	lastslash = strrchr(path, '/');
-	if (lastslash == NULL) {
-		ERROR("Failed to detect \"/\" in \"%s\"", path);
-		return false;
-	}
-	oldv = *lastslash;
-	*lastslash = '\0';
-	fpath = must_make_path(path, file, NULL);
-	*lastslash = oldv;
-	len = lxc_read_from_file(fpath, NULL, 0);
-	if (len <= 0)
-		goto on_error;
-
-	value = must_realloc(NULL, len + 1);
-	ret = lxc_read_from_file(fpath, value, len);
-	if (ret != len)
-		goto on_error;
-	free(fpath);
-
-	fpath = must_make_path(path, file, NULL);
-	ret = lxc_write_to_file(fpath, value, len, false, 0666);
-	if (ret < 0)
-		SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath);
-	free(fpath);
-	free(value);
-	return ret >= 0;
-
-on_error:
-	SYSERROR("Failed to read file \"%s\"", fpath);
-	free(fpath);
-	free(value);
-	return false;
-}
-
-static bool build_sub_cpuset_cgroup_dir(char *cgpath)
-{
-	int ret;
-
-	ret = mkdir_p(cgpath, 0755);
-	if (ret < 0) {
-		if (errno != EEXIST) {
-			SYSERROR("Failed to create directory \"%s\"", cgpath);
-			return false;
-		}
-	}
-
-	/* copy parent's settings */
-	if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) {
-		SYSERROR("Failed to copy \"cpuset.cpus\" settings");
-		return false;
-	}
-
-	/* copy parent's settings */
-	if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) {
-		SYSERROR("Failed to copy \"cpuset.mems\" settings");
-		return false;
-	}
-
-	return true;
-}
-
-static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
-{
-	char *cgpath, *slash;
-	bool sub_mk_success = false;
-
-	if (!string_in_list(h->controllers, "cpuset"))
-		return true;
-
-	cgname += strspn(cgname, "/");
-
-	slash = strchr(cgname, '/');
-
-	if (slash != NULL) {
-		while (slash) {
-			*slash = '\0';
-			cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
-			sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
-			free(cgpath);
-			*slash = '/';
-			if (!sub_mk_success) {
-				return false;
-			}
-			slash = strchr(slash + 1, '/');
-		}
-	}
-
-	cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
-	sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
-	free(cgpath);
-	if (!sub_mk_success) {
-		return false;
-	}
-
-	return true;
-}
-
-static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode)
-{
-	const char *tmp = dir;
-	const char *orig = dir;
-
-	do {
-		int ret;
-		size_t cur_len;
-		char *makeme;
-
-		dir = tmp + strspn(tmp, "/");
-		tmp = dir + strcspn(dir, "/");
-
-		errno = ENOMEM;
-		cur_len = dir - orig;
-		makeme = strndup(orig, cur_len);
-		if (!makeme)
-			return -1;
-
-		ret = mkdir(makeme, mode);
-		if (ret < 0) {
-			if (errno != EEXIST) {
-				SYSERROR("Failed to create directory \"%s\"", makeme);
-				free(makeme);
-				return -1;
-			}
-		}
-		free(makeme);
-
-	} while (tmp != dir);

-	return 0;
-}
-
-static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd)
-{
-	int ret;
-	__do_free char *path = NULL;
-
-	path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
-
-	if (file_exists(path)) { // it must not already exist
-		ERROR("Cgroup path \"%s\" already exist.", path);
-		lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.",
-		                        __FILE__, __LINE__, path);
-		return false;
-	}
-
-	if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
-		ERROR("Failed to handle legacy cpuset controller");
-		return false;
-	}
-
-	ret = isulad_mkdir_eexist_on_last(path, 0755);
-	if (ret < 0) {
-		ERROR("Failed to create cgroup \"%s\"", path);
-		return false;
-	}
-
-	h->cgfd_con = lxc_open_dirfd(path);
-	if (h->cgfd_con < 0)
-		return log_error_errno(false, errno, "Failed to open %s", path);
-
-	if (h->container_full_path == NULL) {
-		h->container_full_path = move_ptr(path);
-	}
-
-	return true;
-}
-
-/* isulad: create hierarchies path, if fail, return the error */
-__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
-						      struct lxc_handler *handler)
-{
-	int i;
-	char *container_cgroup = ops->container_cgroup;
-
-	if (!container_cgroup) {
-		ERROR("cgfsng_create container_cgroup is invalid");
-		return false;
-	}
-
-	for (i = 0; ops->hierarchies[i]; i++) {
-		if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) {
-			SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path);
-			return false;
-		}
-	}
-
-	return true;
-}
-#else
 /*
  * Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
  * next cgroup_pattern-1, -2, ..., -999.
@@ -1698,15 +1357,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
 	INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup);
 	return true;
 }
-#endif

-#ifdef HAVE_ISULAD
-__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
-					      struct lxc_handler *handler)
-{
-	return true;
-}
-#else
 __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
 					      struct lxc_handler *handler)
 {
@@ -1758,58 +1409,7 @@ __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,

 	return true;
 }
-#endif

-#ifdef HAVE_ISULAD
-__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
-					      struct lxc_handler *handler)
-{
-	int len;
-	char pidstr[INTTYPE_TO_STRLEN(pid_t)];
-
-	if (!ops)
-		return ret_set_errno(false, ENOENT);
-
-	if (!ops->hierarchies)
-		return true;
-
-	if (!ops->container_cgroup)
-		return ret_set_errno(false, ENOENT);
-
-	if (!handler || !handler->conf)
-		return ret_set_errno(false, EINVAL);
-
-	len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid);
-
-	for (int i = 0; ops->hierarchies[i]; i++) {
-		int ret;
-		char *fullpath;
-		int retry_count = 0;
-		int max_retry = 10;
-
-		fullpath = must_make_path(ops->hierarchies[i]->container_full_path,
-		                          "cgroup.procs", NULL);
-retry:
-		ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
-		if (ret != 0) {
-			if (retry_count < max_retry) {
-				SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count);
-				(void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup);
-				(void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755);
-				usleep(100 * 1000); /* 100 millisecond */
-				retry_count++;
-				goto retry;
-			}
-			SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
-			free(fullpath);
-			return false;
-		}
-		free(fullpath);
-	}
-
-	return true;
-}
-#else
 __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
 					      struct lxc_handler *handler)
 {
@@ -1841,7 +1441,6 @@ __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,

 	return true;
 }
-#endif

 static int fchowmodat(int dirfd, const char *path, uid_t chown_uid,
 		      gid_t chown_gid, mode_t chmod_mode)
@@ -2056,234 +1655,39 @@ static int __cg_mount_direct(int type, struct hierarchy *h,
 	 flags |= MS_RELATIME;

 	 if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
-		 flags |= MS_RDONLY;
-
-	 if (h->version != CGROUP2_SUPER_MAGIC) {
-		 controllers = lxc_string_join(",", (const char **)h->controllers, false);
-		 if (!controllers)
-			 return -ENOMEM;
-		 fstype = "cgroup";
-	}
-
-	ret = mount("cgroup", controllerpath, fstype, flags, controllers);
-	if (ret < 0)
-		return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s",
-				       controllerpath, fstype);
-
-	DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
-	return 0;
-}
-
-static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
-					       const char *controllerpath)
-{
-	return __cg_mount_direct(type, h, controllerpath);
-}
-
-static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
-				       const char *controllerpath)
-{
-	if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
-		return 0;
-
-	return __cg_mount_direct(type, h, controllerpath);
-}
-
-#ifdef HAVE_ISULAD
-__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
-                                      struct lxc_handler *handler,
-                                      const char *root, int type)
-{
-	int i, ret;
-	char *tmpfspath = NULL;
-	char *systemdpath = NULL;
-	char *unifiedpath = NULL;
-	bool has_cgns = false, retval = false, wants_force_mount = false;
-	char **merged = NULL;
-
-	if ((type & LXC_AUTO_CGROUP_MASK) == 0)
-		return true;
-
-	if (type & LXC_AUTO_CGROUP_FORCE) {
-		type &= ~LXC_AUTO_CGROUP_FORCE;
-		wants_force_mount = true;
-	}
-
-	if (!wants_force_mount) {
-		if (!lxc_list_empty(&handler->conf->keepcaps))
-			wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
-		else
-			wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
-	}
-
-	has_cgns = cgns_supported();
-	if (has_cgns && !wants_force_mount)
-		return true;
-
-	if (type == LXC_AUTO_CGROUP_NOSPEC)
-		type = LXC_AUTO_CGROUP_MIXED;
-	else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
-		type = LXC_AUTO_CGROUP_FULL_MIXED;
-
-	/* Mount tmpfs */
-	tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
-	if (mkdir_p(tmpfspath, 0755) < 0) {
-		ERROR("Failed to create directory: %s", tmpfspath);
-		goto on_error;
-	}
-	ret = safe_mount(NULL, tmpfspath, "tmpfs",
-	                 MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
-	                 "size=10240k,mode=755", root, handler->conf->lsm_se_mount_context);
-	if (ret < 0)
-		goto on_error;
-
-	for (i = 0; ops->hierarchies[i]; i++) {
-		char *controllerpath = NULL;
-		char *path2 = NULL;
-		struct hierarchy *h = ops->hierarchies[i];
-		char *controller = strrchr(h->mountpoint, '/');
-
-		if (!controller)
-			continue;
-		controller++;
-
-		// isulad: symlink subcgroup
-		if (strchr(controller, ',') != NULL) {
-			int pret;
-			pret = lxc_append_string(&merged, controller);
-			if (pret < 0)
-				goto on_error;
-		}
-
-		controllerpath = must_make_path(tmpfspath, controller, NULL);
-		if (dir_exists(controllerpath)) {
-			free(controllerpath);
-			continue;
-		}
-
-		ret = mkdir(controllerpath, 0755);
-		if (ret < 0) {
-			SYSERROR("Error creating cgroup path: %s", controllerpath);
-			free(controllerpath);
-			goto on_error;
-		}
-
-		if (has_cgns && wants_force_mount) {
-			/* If cgroup namespaces are supported but the container
-			 * will not have CAP_SYS_ADMIN after it has started we
-			 * need to mount the cgroups manually.
-			 */
-			ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
-			free(controllerpath);
-			if (ret < 0)
-				goto on_error;
-
-			continue;
-		}
-
-		ret = cg_mount_cgroup_full(type, h, controllerpath);
-		if (ret < 0) {
-			free(controllerpath);
-			goto on_error;
-		}
-
-		if (!cg_mount_needs_subdirs(type)) {
-			free(controllerpath);
-			continue;
-		}
-
-		// isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container,
-		// isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container
-		path2 = must_make_path(controllerpath, NULL);
-		ret = mkdir_p(path2, 0755);
-		if (ret < 0) {
-			free(controllerpath);
-			free(path2);
-			goto on_error;
-		}
-
-		ret = cg_legacy_mount_controllers(type, h, controllerpath,
-		                                  path2, ops->container_cgroup);
-		free(controllerpath);
-		free(path2);
-		if (ret < 0)
-			goto on_error;
-	}
-
-	// isulad: symlink subcgroup
-	if (merged) {
-		char **mc = NULL;
-		for (mc = merged; *mc; mc++) {
-			char *token = NULL;
-			char *copy = must_copy_string(*mc);
-			lxc_iterate_parts(token, copy, ",") {
-				int mret;
-				char *link;
-				link = must_make_path(tmpfspath, token, NULL);
-				mret = symlink(*mc, link);
-				if (mret < 0 && errno != EEXIST) {
-					SYSERROR("Failed to create link %s for target %s", link, *mc);
-					free(copy);
-					free(link);
-					goto on_error;
-				}
-				free(link);
-			}
-			free(copy);
-		}
-	}
-
+		 flags |= MS_RDONLY;

-	// isulad: remount /sys/fs/cgroup to readonly
-	if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) {
-		ret = mount(tmpfspath, tmpfspath, "bind",
-		            MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL);
-		if (ret < 0) {
-			SYSERROR("Failed to remount /sys/fs/cgroup.");
-			goto on_error;
-		}
+	 if (h->version != CGROUP2_SUPER_MAGIC) {
+		 controllers = lxc_string_join(",", (const char **)h->controllers, false);
+		 if (!controllers)
+			 return -ENOMEM;
+		 fstype = "cgroup";
 	}

-	// isulad: remount /sys/fs/cgroup/systemd to readwrite for system container
-	if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0)
-	{
-		unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL);
-		if (dir_exists(unifiedpath))
-		{
-			ret = umount2(unifiedpath, MNT_DETACH);
-			if (ret < 0)
-			{
-				SYSERROR("Failed to umount /sys/fs/cgroup/unified.");
-				goto on_error;
-			}
-		}
+	ret = mount("cgroup", controllerpath, fstype, flags, controllers);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s",
+				       controllerpath, fstype);

-		systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL);
-		ret = mount(systemdpath, systemdpath, "bind",
-					MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME | MS_BIND | MS_REMOUNT, NULL);
-		if (ret < 0)
-		{
-			SYSERROR("Failed to remount /sys/fs/cgroup/systemd.");
-			goto on_error;
-		}
-	}
+	DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
+	return 0;
+}

-	retval = true;
+static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
+					       const char *controllerpath)
+{
+	return __cg_mount_direct(type, h, controllerpath);
+}

-on_error:
-	free(tmpfspath);
-	if (systemdpath != NULL)
-	{
-		free(systemdpath);
-	}
-	if (unifiedpath != NULL)
-	{
-		free(unifiedpath);
-	}
-	lxc_free_array((void **)merged, free);
-	return retval;
+static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
+				       const char *controllerpath)
+{
+	if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
+		return 0;
+
+	return __cg_mount_direct(type, h, controllerpath);
 }
-#else
+
 __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
 				      struct lxc_handler *handler,
 				      const char *root, int type)
@@ -2396,7 +1800,6 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,

 	return true;
 }
-#endif

 /* Only root needs to escape to the cgroup of its init. */
 __cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops,
@@ -2647,34 +2050,11 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
 		return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
 				      controller ? controller : "(null)");

-#ifdef HAVE_ISULAD
-	if (!h->container_full_path)
-		h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL);
-#endif
-
 	return h->container_full_path
 		   ? h->container_full_path + strlen(h->mountpoint)
 		   : NULL;
 }

-#ifdef HAVE_ISULAD
-__cgfsng_ops static const char *cgfsng_get_cgroup_full_path(struct cgroup_ops *ops,
-						  const char *controller)
-{
-	struct hierarchy *h;
-
-	h = get_hierarchy(ops, controller);
-	if (!h)
-		return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
-				      controller ? controller : "(null)");
-
-	if (!h->container_full_path)
-		h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL);
-
-	return h->container_full_path;
-}
-#endif
-
 /* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
  * which must be freed by the caller.
  */
@@ -2981,44 +2361,6 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops,
 	return true;
 }

-#ifdef HAVE_ISULAD
-__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
-                                   char *value, size_t len, const char *name,
-                                   const char *lxcpath)
-{
-	int ret = -1;
-	size_t controller_len;
-	char *controller, *p, *path;
-	struct hierarchy *h;
-
-	controller_len = strlen(filename);
-	controller = alloca(controller_len + 1);
-	(void)strlcpy(controller, filename, controller_len + 1);
-
-	p = strchr(controller, '.');
-	if (p)
-		*p = '\0';
-
-	const char *ori_path = ops->get_cgroup(ops, controller);
-	if (ori_path == NULL) {
-		ERROR("Failed to get cgroup path:%s", controller);
-		return -1;
-	}
-	path = safe_strdup(ori_path);
-
-	h = get_hierarchy(ops, controller);
-	if (h) {
-		char *fullpath;
-
-		fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
-		ret = lxc_read_from_file(fullpath, value, len);
-		free(fullpath);
-	}
-	free(path);
-
-	return ret;
-}
-#else
 /* Called externally (i.e. from 'lxc-cgroup') to query cgroup limits.  Here we
  * don't have a cgroup_data set up, so we ask the running container through the
  * commands API for the cgroup path.
@@ -3056,7 +2398,6 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,

 	return ret;
 }
-#endif

 static int device_cgroup_parse_access(struct device_item *device, const char *val)
 {
@@ -3170,44 +2511,6 @@ int device_cgroup_rule_parse(struct device_item *device, const char *key,
 	return device_cgroup_parse_access(device, ++val);
 }

-#ifdef HAVE_ISULAD
-__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
-                                   const char *filename, const char *value,
-                                   const char *name, const char *lxcpath)
-{
-	int ret = -1;
-	size_t controller_len;
-	char *controller, *p, *path;
-	struct hierarchy *h;
-
-	controller_len = strlen(filename);
-	controller = alloca(controller_len + 1);
-	(void)strlcpy(controller, filename, controller_len + 1);
-
-	p = strchr(controller, '.');
-	if (p)
-		*p = '\0';
-
-	const char *ori_path = ops->get_cgroup(ops, controller);
-	if (ori_path == NULL) {
-		ERROR("Failed to get cgroup path:%s", controller);
-		return -1;
-	}
-	path = safe_strdup(ori_path);
-
-	h = get_hierarchy(ops, controller);
-	if (h) {
-		char *fullpath;
-
-		fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
-		ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
-		free(fullpath);
-	}
-	free(path);
-
-	return ret;
-}
-#else
 /* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits.  Here we
  * don't have a cgroup_data set up, so we ask the running container through the
  * commands API for the cgroup path.
@@ -3260,7 +2563,6 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,

 	return ret;
 }
-#endif

 /* take devices cgroup line
  *    /dev/foo rwx
@@ -3352,7 +2654,6 @@ static int convert_devpath(const char *invalue, char *dest)
 	return 0;
 }

-#ifndef HAVE_ISULAD
 /* Called from setup_limits - here we have the container's cgroup_data because
  * we created the cgroups.
  */
@@ -3385,212 +2686,7 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,

 	return lxc_write_openat(h->container_full_path, filename, value, strlen(value));
 }
-#endif
-
-#ifdef HAVE_ISULAD
-/* Called from setup_limits - here we have the container's cgroup_data because
- * we created the cgroups.
- */
-static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename,
-                              char *value, size_t len)
-{
-	char *fullpath = NULL;
-	char *p = NULL;
-	struct hierarchy *h = NULL;
-	int ret = 0;
-	char *controller = NULL;
-
-	len = strlen(filename);
-	if (SIZE_MAX - 1 < len) {
-		errno = EINVAL;
-		return -1;
-	}
-	controller = calloc(1, len + 1);
-	if (controller == NULL) {
-		errno = ENOMEM;
-		return -1;
-	}
-	(void)strlcpy(controller, filename, len + 1);
-
-	p = strchr(controller, '.');
-	if (p)
-		*p = '\0';
-
-
-	h = get_hierarchy(ops, controller);
-	if (!h) {
-		ERROR("Failed to setup limits for the \"%s\" controller. "
-		      "The controller seems to be unused by \"cgfsng\" cgroup "
-		      "driver or not enabled on the cgroup hierarchy",
-		      controller);
-		errno = ENOENT;
-		free(controller);
-		return -ENOENT;
-	}
-
-	fullpath = must_make_path(h->container_full_path, filename, NULL);
-	ret = lxc_read_from_file(fullpath, value, len);
-	free(fullpath);
-	free(controller);
-	return ret;
-}
-
-static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
-                              const char *value)
-{
-	size_t len;
-	char *fullpath, *p;
-	/* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
-	char converted_value[50];
-	struct hierarchy *h;
-	int ret = 0;
-	char *controller = NULL;
-	int retry_count = 0;
-	int max_retry = 10;
-	char *container_cgroup = ops->container_cgroup;
-
-	len = strlen(filename);
-	controller = alloca(len + 1);
-	(void)strlcpy(controller, filename, len + 1);
-
-	p = strchr(controller, '.');
-	if (p)
-		*p = '\0';
-
-	if (strcmp("devices.allow", filename) == 0 && value[0] == '/') {
-		ret = convert_devpath(value, converted_value);
-		if (ret < 0)
-			return ret;
-		value = converted_value;
-	}
-
-	h = get_hierarchy(ops, controller);
-	if (!h) {
-		ERROR("Failed to setup limits for the \"%s\" controller. "
-		      "The controller seems to be unused by \"cgfsng\" cgroup "
-		      "driver or not enabled on the cgroup hierarchy",
-		      controller);
-		errno = ENOENT;
-		return -ENOENT;
-	}
-
-	fullpath = must_make_path(h->container_full_path, filename, NULL);
-
-retry:
-	ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
-	if (ret != 0) {
-		if (retry_count < max_retry) {
-			SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath);
-			(void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup);
-			(void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755);
-			usleep(100 * 1000); /* 100 millisecond */
-			retry_count++;
-			goto retry;
-		}
-		lxc_write_error_message(ops->errfd,
-		                        "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".",
-		                        __FILE__, __LINE__, value, fullpath, strerror(errno));
-	}
-	free(fullpath);
-	return ret;
-}
-
-__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
-						    struct lxc_conf *conf,
-						    bool do_devices)
-{
-	__do_free struct lxc_list *sorted_cgroup_settings = NULL;
-	struct lxc_list *cgroup_settings = &conf->cgroup;
-	struct lxc_list *iterator, *next;
-	struct lxc_cgroup *cg;
-	bool ret = false;
-	char value[21 + 1] = { 0 };
-	long long int readvalue, setvalue;
-
-	if (!ops)
-		return ret_set_errno(false, ENOENT);
-
-	if (!conf)
-		return ret_set_errno(false, EINVAL);
-
-	cgroup_settings = &conf->cgroup;
-	if (lxc_list_empty(cgroup_settings))
-		return true;
-
-	if (!ops->hierarchies)
-		return ret_set_errno(false, EINVAL);
-
-	sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
-	if (!sorted_cgroup_settings)
-		return false;
-
-	lxc_list_for_each(iterator, sorted_cgroup_settings) {
-		cg = iterator->elem;
-
-		if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
-			const char *cgvalue = cg->value;
-			if (strcmp(cg->subsystem, "files.limit") == 0) {
-				if (lxc_safe_long_long(cgvalue, &setvalue) != 0) {
-					SYSERROR("Invalid integer value %s", cgvalue);
-					goto out;
-				}
-				if (setvalue <= 0) {
-					cgvalue = "max";
-				}
-			}
-			if (isulad_cg_legacy_set_data(ops, cg->subsystem, cgvalue)) {
-				if (do_devices && (errno == EACCES || errno == EPERM)) {
-					SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue);
-					continue;
-				}
-				SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue);
-				goto out;
-			}
-			DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cgvalue);
-		}
-
-		// isulad: check cpu shares
-		if (strcmp(cg->subsystem, "cpu.shares") == 0) {
-			if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) {
-				SYSERROR("Error get %s", cg->subsystem);
-				goto out;
-			}
-			trim(value);
-			if (lxc_safe_long_long(cg->value, &setvalue) != 0) {
-				SYSERROR("Invalid value %s", cg->value);
-				goto out;
-			}
-			if (lxc_safe_long_long(value, &readvalue) != 0) {
-				SYSERROR("Invalid value %s", value);
-				goto out;
-			}
-			if (setvalue > readvalue) {
-				ERROR("The maximum allowed cpu-shares is %s", value);
-				lxc_write_error_message(ops->errfd,
-				                        "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".",
-				                        __FILE__, __LINE__, value);
-				goto out;
-			} else if (setvalue < readvalue) {
-				ERROR("The minimum allowed cpu-shares is %s", value);
-				lxc_write_error_message(ops->errfd,
-				                        "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".",
-				                        __FILE__, __LINE__, value);
-				goto out;
-			}
-		}
-	}
-
-	ret = true;
-	INFO("Limits for the legacy cgroup hierarchies have been setup");
-out:
-	lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
-		lxc_list_del(iterator);
-		free(iterator);
-	}

-	return ret;
-}
-#else
 __cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
 						    struct lxc_conf *conf,
 						    bool do_devices)
@@ -3644,7 +2740,6 @@ out:

 	return ret;
 }
-#endif

 /*
  * Some of the parsing logic comes from the original cgroup device v1
@@ -3856,12 +2951,6 @@ bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
 	return true;
 }

-#ifdef HAVE_ISULAD
-__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
-{
-	return true;
-}
-#else
 __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
 {
 	if (!ops)
@@ -3869,7 +2958,6 @@ __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)

 	return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup);
 }
-#endif

 __cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
 {
@@ -4019,22 +3107,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg

 		trim(base_cgroup);
 		prune_init_scope(base_cgroup);
-#ifdef HAVE_ISULAD
-		/* isulad: do not test writeable, if we run isulad in docker without cgroup namespace.
-		 * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */
-
-		/*
-		 * reason:base cgroup may be started with /system.slice when cg_hybrid_init
-		 *	read /proc/1/cgroup on host, and cgroup init will set all containers
-		 *	cgroup path under /sys/fs/cgroup/<controller>/system.slice/xxx/lxc
-		 *	directory, this is not consistent with docker. The default cgroup path
-		 *	should be under /sys/fs/cgroup/<controller>/lxc directory.
-		 */

-		if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') {
-			base_cgroup[1] = '\0';
-		}
-#else
 		bool writeable;
 		if (type == CGROUP2_SUPER_MAGIC)
 			writeable = test_writeable_v2(mountpoint, base_cgroup);
@@ -4044,7 +3117,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
 			TRACE("The %s group is not writeable", base_cgroup);
 			continue;
 		}
-#endif
+
 		if (type == CGROUP2_SUPER_MAGIC) {
 			char *cgv2_ctrl_path;

@@ -4197,44 +3270,6 @@ static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
 	return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
 }

-#ifdef HAVE_ISULAD
-__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
-{
-	const char *cgroup_pattern;
-	const char *cgroup_tree;
-	__do_free char *container_cgroup = NULL, *__cgroup_tree = NULL;
-	size_t len;
-
-	if (!ops)
-		return ret_set_errno(-1, ENOENT);
-
-	/* copy system-wide cgroup information */
-	cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
-	if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0)
-		ops->cgroup_pattern = must_copy_string(cgroup_pattern);
-
-	if (conf->cgroup_meta.dir) {
-		cgroup_tree = conf->cgroup_meta.dir;
-		container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL);
-	} else if (ops->cgroup_pattern) {
-		__cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern);
-		if (!__cgroup_tree)
-			return ret_set_errno(-1, ENOMEM);
-
-		cgroup_tree = __cgroup_tree;
-		container_cgroup = must_concat(&len, cgroup_tree, NULL);
-	} else {
-		cgroup_tree = NULL;
-		container_cgroup = must_concat(&len, conf->name, NULL);
-	}
-	if (!container_cgroup)
-		return ret_set_errno(-1, ENOMEM);
-
-	ops->container_cgroup = move_ptr(container_cgroup);
-
-	return 0;
-}
-#else
 __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
 {
 	const char *cgroup_pattern;
@@ -4249,7 +3284,6 @@ __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf

 	return 0;
 }
-#endif

 struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
 {
@@ -4266,12 +3300,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
 		return NULL;

 	cgfsng_ops->data_init = cgfsng_data_init;
-#ifdef HAVE_ISULAD
-	cgfsng_ops->errfd = conf ? conf->errpipe[1] : -1;
-	cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy;
-#else
 	cgfsng_ops->payload_destroy = cgfsng_payload_destroy;
-#endif
 	cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
 	cgfsng_ops->monitor_create = cgfsng_monitor_create;
 	cgfsng_ops->monitor_enter = cgfsng_monitor_enter;
@@ -4284,9 +3313,6 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
 	cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies;
 	cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies;
 	cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
-#ifdef HAVE_ISULAD
-	cgfsng_ops->get_cgroup_full_path = cgfsng_get_cgroup_full_path;
-#endif
 	cgfsng_ops->get = cgfsng_get;
 	cgfsng_ops->set = cgfsng_set;
 	cgfsng_ops->freeze = cgfsng_freeze;
diff --git a/src/lxc/cgroups/isulad_cgfsng.c b/src/lxc/cgroups/isulad_cgfsng.c
new file mode 100644
index 000000000..82a4333f3
--- /dev/null
+++ b/src/lxc/cgroups/isulad_cgfsng.c
@@ -0,0 +1,3115 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved.
+ * Author: lifeng
+ * Create: 2020-11-02
+ * Description: provide container definition
+ * lxc: linux Container library
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  ******************************************************************************/
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <grp.h>
+#include <linux/kdev_t.h>
+#include <linux/types.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "af_unix.h"
+#include "caps.h"
+#include "cgroup.h"
+#include "cgroup2_devices.h"
+#include "cgroup_utils.h"
+#include "commands.h"
+#include "conf.h"
+#include "config.h"
+#include "log.h"
+#include "macro.h"
+#include "mainloop.h"
+#include "memory_utils.h"
+#include "storage/storage.h"
+#include "utils.h"
+
+#ifndef HAVE_STRLCPY
+#include "include/strlcpy.h"
+#endif
+
+#ifndef HAVE_STRLCAT
+#include "include/strlcat.h"
+#endif
+
+lxc_log_define(isulad_cgfsng, cgroup);
+
+/* Given a pointer to a null-terminated array of pointers, realloc to add one
+ * entry, and point the new entry to NULL. Do not fail. Return the index to the
+ * second-to-last entry - that is, the one which is now available for use
+ * (keeping the list null-terminated).
+ */
+static int append_null_to_list(void ***list)
+{
+	int newentry = 0;
+
+	if (*list)
+		for (; (*list)[newentry]; newentry++)
+			;
+
+	*list = must_realloc(*list, (newentry + 2) * sizeof(void **));
+	(*list)[newentry + 1] = NULL;
+	return newentry;
+}
+
+/* Given a null-terminated array of strings, check whether @entry is one of the
+ * strings.
+ */
+static bool string_in_list(char **list, const char *entry)
+{
+	if (!list)
+		return false;
+
+	for (int i = 0; list[i]; i++)
+		if (strcmp(list[i], entry) == 0)
+			return true;
+
+	return false;
+}
+
+/* Return a copy of @entry prepending "name=", i.e.  turn "systemd" into
+ * "name=systemd". Do not fail.
+ */
+static char *cg_legacy_must_prefix_named(char *entry)
+{
+	size_t len;
+	char *prefixed;
+
+	len = strlen(entry);
+	prefixed = must_realloc(NULL, len + 6);
+
+	memcpy(prefixed, "name=", STRLITERALLEN("name="));
+	memcpy(prefixed + STRLITERALLEN("name="), entry, len);
+	prefixed[len + 5] = '\0';
+
+	return prefixed;
+}
+
+/* Append an entry to the clist. Do not fail. @clist must be NULL the first time
+ * we are called.
+ *
+ * We also handle named subsystems here. Any controller which is not a kernel
+ * subsystem, we prefix "name=". Any which is both a kernel and named subsystem,
+ * we refuse to use because we're not sure which we have here.
+ * (TODO: We could work around this in some cases by just remounting to be
+ * unambiguous, or by comparing mountpoint contents with current cgroup.)
+ *
+ * The last entry will always be NULL.
+ */
+static void must_append_controller(char **klist, char **nlist, char ***clist,
+				   char *entry)
+{
+	int newentry;
+	char *copy;
+
+	if (string_in_list(klist, entry) && string_in_list(nlist, entry)) {
+		ERROR("Refusing to use ambiguous controller \"%s\"", entry);
+		ERROR("It is both a named and kernel subsystem");
+		return;
+	}
+
+	newentry = append_null_to_list((void ***)clist);
+
+	if (strncmp(entry, "name=", 5) == 0)
+		copy = must_copy_string(entry);
+	else if (string_in_list(klist, entry))
+		copy = must_copy_string(entry);
+	else
+		copy = cg_legacy_must_prefix_named(entry);
+
+	(*clist)[newentry] = copy;
+}
+
+/* Given a handler's cgroup data, return the struct hierarchy for the controller
+ * @c, or NULL if there is none.
+ */
+struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
+{
+	if (!ops->hierarchies)
+		return log_trace_errno(NULL, errno, "There are no useable cgroup controllers");
+
+	for (int i = 0; ops->hierarchies[i]; i++) {
+		if (!controller) {
+			/* This is the empty unified hierarchy. */
+			if (ops->hierarchies[i]->controllers &&
+			    !ops->hierarchies[i]->controllers[0])
+				return ops->hierarchies[i];
+			continue;
+		} else if (pure_unified_layout(ops) &&
+			   strcmp(controller, "devices") == 0) {
+			if (ops->unified->bpf_device_controller)
+				return ops->unified;
+			break;
+		}
+
+		if (string_in_list(ops->hierarchies[i]->controllers, controller))
+			return ops->hierarchies[i];
+	}
+
+	if (controller)
+		WARN("There is no useable %s controller", controller);
+	else
+		WARN("There is no empty unified cgroup hierarchy");
+
+	return ret_set_errno(NULL, ENOENT);
+}
+
+#define BATCH_SIZE 50
+static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
+{
+	int newbatches = (newlen / BATCH_SIZE) + 1;
+	int oldbatches = (oldlen / BATCH_SIZE) + 1;
+
+	if (!*mem || newbatches > oldbatches)
+		*mem = must_realloc(*mem, newbatches * BATCH_SIZE);
+}
+
+static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
+{
+	size_t full = oldlen + newlen;
+
+	batch_realloc(dest, oldlen, full + 1);
+
+	memcpy(*dest + oldlen, new, newlen + 1);
+}
+
+/* Slurp in a whole file */
+static char *read_file(const char *fnam)
+{
+	__do_free char *buf = NULL, *line = NULL;
+	__do_fclose FILE *f = NULL;
+	size_t len = 0, fulllen = 0;
+	int linelen;
+
+	f = fopen(fnam, "re");
+	if (!f)
+		return NULL;
+
+	while ((linelen = getline(&line, &len, f)) != -1) {
+		append_line(&buf, fulllen, line, linelen);
+		fulllen += linelen;
+	}
+
+	return move_ptr(buf);
+}
+
+static inline bool is_unified_hierarchy(const struct hierarchy *h)
+{
+	return h->version == CGROUP2_SUPER_MAGIC;
+}
+
+/* Given two null-terminated lists of strings, return true if any string is in
+ * both.
+ */
+static bool controller_lists_intersect(char **l1, char **l2)
+{
+	if (!l1 || !l2)
+		return false;
+
+	for (int i = 0; l1[i]; i++)
+		if (string_in_list(l2, l1[i]))
+			return true;
+
+	return false;
+}
+
+/* For a null-terminated list of controllers @clist, return true if any of those
+ * controllers is already listed the null-terminated list of hierarchies @hlist.
+ * Realistically, if one is present, all must be present.
+ */
+static bool controller_list_is_dup(struct hierarchy **hlist, char **clist)
+{
+	if (!hlist)
+		return false;
+
+	for (int i = 0; hlist[i]; i++)
+		if (controller_lists_intersect(hlist[i]->controllers, clist))
+			return true;
+
+	return false;
+}
+
+/* Return true if the controller @entry is found in the null-terminated list of
+ * hierarchies @hlist.
+ */
+static bool controller_found(struct hierarchy **hlist, char *entry)
+{
+	if (!hlist)
+		return false;
+
+	for (int i = 0; hlist[i]; i++)
+		if (string_in_list(hlist[i]->controllers, entry))
+			return true;
+
+	return false;
+}
+
+/* Return true if all of the controllers which we require have been found.  The
+ * required list is  freezer and anything in lxc.cgroup.use.
+ */
+static bool all_controllers_found(struct cgroup_ops *ops)
+{
+	struct hierarchy **hlist;
+
+	if (!ops->cgroup_use)
+		return true;
+
+	hlist = ops->hierarchies;
+	for (char **cur = ops->cgroup_use; cur && *cur; cur++)
+		if (!controller_found(hlist, *cur))
+			return log_error(false, "No %s controller mountpoint found", *cur);
+
+	return true;
+}
+
+/* Get the controllers from a mountinfo line There are other ways we could get
+ * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
+ * could parse the mount options. But we simply assume that the mountpoint must
+ * be /sys/fs/cgroup/controller-list
+ */
+static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
+					int type)
+{
+	/* The fourth field is /sys/fs/cgroup/comma-delimited-controller-list
+	 * for legacy hierarchies.
+	 */
+	__do_free_string_list char **aret = NULL;
+	int i;
+	char *p2, *tok;
+	char *p = line, *sep = ",";
+
+	for (i = 0; i < 4; i++) {
+		p = strchr(p, ' ');
+		if (!p)
+			return NULL;
+		p++;
+	}
+
+	/* Note, if we change how mountinfo works, then our caller will need to
+	 * verify /sys/fs/cgroup/ in this field.
+	 */
+	if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0)
+		return log_error(NULL, "Found hierarchy not under " DEFAULT_CGROUP_MOUNTPOINT ": \"%s\"", p);
+
+	p += 15;
+	p2 = strchr(p, ' ');
+	if (!p2)
+		return log_error(NULL, "Corrupt mountinfo");
+	*p2 = '\0';
+
+	if (type == CGROUP_SUPER_MAGIC) {
+		__do_free char *dup = NULL;
+
+		/* strdup() here for v1 hierarchies. Otherwise
+		 * lxc_iterate_parts() will destroy mountpoints such as
+		 * "/sys/fs/cgroup/cpu,cpuacct".
+		 */
+		dup = must_copy_string(p);
+		if (!dup)
+			return NULL;
+
+		lxc_iterate_parts (tok, dup, sep)
+			must_append_controller(klist, nlist, &aret, tok);
+	}
+	*p2 = ' ';
+
+	return move_ptr(aret);
+}
+
+static char **cg_unified_make_empty_controller(void)
+{
+	__do_free_string_list char **aret = NULL;
+	int newentry;
+
+	newentry = append_null_to_list((void ***)&aret);
+	aret[newentry] = NULL;
+	return move_ptr(aret);
+}
+
+static char **cg_unified_get_controllers(const char *file)
+{
+	__do_free char *buf = NULL;
+	__do_free_string_list char **aret = NULL;
+	char *sep = " \t\n";
+	char *tok;
+
+	buf = read_file(file);
+	if (!buf)
+		return NULL;
+
+	lxc_iterate_parts(tok, buf, sep) {
+		int newentry;
+		char *copy;
+
+		newentry = append_null_to_list((void ***)&aret);
+		copy = must_copy_string(tok);
+		aret[newentry] = copy;
+	}
+
+	return move_ptr(aret);
+}
+
+static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint,
+				       char *container_base_path, int type)
+{
+	struct hierarchy *new;
+	int newentry;
+
+	new = zalloc(sizeof(*new));
+	new->controllers = clist;
+	new->mountpoint = mountpoint;
+	new->container_base_path = container_base_path;
+	new->version = type;
+	new->cgfd_con = -EBADF;
+	new->cgfd_mon = -EBADF;
+
+	newentry = append_null_to_list((void ***)h);
+	(*h)[newentry] = new;
+	return new;
+}
+
+/* Get a copy of the mountpoint from @line, which is a line from
+ * /proc/self/mountinfo.
+ */
+static char *cg_hybrid_get_mountpoint(char *line)
+{
+	char *p = line, *sret = NULL;
+	size_t len;
+	char *p2;
+
+	for (int i = 0; i < 4; i++) {
+		p = strchr(p, ' ');
+		if (!p)
+			return NULL;
+		p++;
+	}
+
+	if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0)
+		return NULL;
+
+	p2 = strchr(p + 15, ' ');
+	if (!p2)
+		return NULL;
+	*p2 = '\0';
+
+	len = strlen(p);
+	sret = must_realloc(NULL, len + 1);
+	memcpy(sret, p, len);
+	sret[len] = '\0';
+
+	return sret;
+}
+
+/* Given a multi-line string, return a null-terminated copy of the current line. */
+static char *copy_to_eol(char *p)
+{
+	char *p2, *sret;
+	size_t len;
+
+	p2 = strchr(p, '\n');
+	if (!p2)
+		return NULL;
+
+	len = p2 - p;
+	sret = must_realloc(NULL, len + 1);
+	memcpy(sret, p, len);
+	sret[len] = '\0';
+
+	return sret;
+}
+
+/* cgline: pointer to character after the first ':' in a line in a \n-terminated
+ * /proc/self/cgroup file. Check whether controller c is present.
+ */
+static bool controller_in_clist(char *cgline, char *c)
+{
+	__do_free char *tmp = NULL;
+	char *tok, *eol;
+	size_t len;
+
+	eol = strchr(cgline, ':');
+	if (!eol)
+		return false;
+
+	len = eol - cgline;
+	tmp = must_realloc(NULL, len + 1);
+	memcpy(tmp, cgline, len);
+	tmp[len] = '\0';
+
+	lxc_iterate_parts(tok, tmp, ",")
+		if (strcmp(tok, c) == 0)
+			return true;
+
+	return false;
+}
+
+/* @basecginfo is a copy of /proc/$$/cgroup. Return the current cgroup for
+ * @controller.
+ */
+static char *cg_hybrid_get_current_cgroup(char *basecginfo, char *controller,
+					  int type)
+{
+	char *p = basecginfo;
+
+	for (;;) {
+		bool is_cgv2_base_cgroup = false;
+
+		/* cgroup v2 entry in "/proc/<pid>/cgroup": "0::/some/path" */
+		if ((type == CGROUP2_SUPER_MAGIC) && (*p == '0'))
+			is_cgv2_base_cgroup = true;
+
+		p = strchr(p, ':');
+		if (!p)
+			return NULL;
+		p++;
+
+		if (is_cgv2_base_cgroup || (controller && controller_in_clist(p, controller))) {
+			p = strchr(p, ':');
+			if (!p)
+				return NULL;
+			p++;
+			return copy_to_eol(p);
+		}
+
+		p = strchr(p, '\n');
+		if (!p)
+			return NULL;
+		p++;
+	}
+}
+
+static void must_append_string(char ***list, char *entry)
+{
+	int newentry;
+	char *copy;
+
+	newentry = append_null_to_list((void ***)list);
+	copy = must_copy_string(entry);
+	(*list)[newentry] = copy;
+}
+
+static int get_existing_subsystems(char ***klist, char ***nlist)
+{
+	__do_free char *line = NULL;
+	__do_fclose FILE *f = NULL;
+	size_t len = 0;
+
+	f = fopen("/proc/self/cgroup", "re");
+	if (!f)
+		return -1;
+
+	while (getline(&line, &len, f) != -1) {
+		char *p, *p2, *tok;
+		p = strchr(line, ':');
+		if (!p)
+			continue;
+		p++;
+		p2 = strchr(p, ':');
+		if (!p2)
+			continue;
+		*p2 = '\0';
+
+		/* If the kernel has cgroup v2 support, then /proc/self/cgroup
+		 * contains an entry of the form:
+		 *
+		 *	0::/some/path
+		 *
+		 * In this case we use "cgroup2" as controller name.
+		 */
+		if ((p2 - p) == 0) {
+			must_append_string(klist, "cgroup2");
+			continue;
+		}
+
+		lxc_iterate_parts(tok, p, ",") {
+			if (strncmp(tok, "name=", 5) == 0)
+				must_append_string(nlist, tok);
+			else
+				must_append_string(klist, tok);
+		}
+	}
+
+	return 0;
+}
+
+static char *trim(char *s)
+{
+	size_t len;
+
+	len = strlen(s);
+	while ((len > 1) && (s[len - 1] == '\n'))
+		s[--len] = '\0';
+
+	return s;
+}
+
+static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops)
+{
+	int i;
+	struct hierarchy **it;
+
+	if (!ops->hierarchies) {
+		TRACE("  No hierarchies found");
+		return;
+	}
+
+	TRACE("  Hierarchies:");
+	for (i = 0, it = ops->hierarchies; it && *it; it++, i++) {
+		int j;
+		char **cit;
+
+		TRACE("  %d: base_cgroup: %s", i, (*it)->container_base_path ? (*it)->container_base_path : "(null)");
+		TRACE("      mountpoint:  %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
+		TRACE("      controllers:");
+		for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
+			TRACE("      %d: %s", j, *cit);
+	}
+}
+
+static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist,
+					      char **nlist)
+{
+	int k;
+	char **it;
+
+	TRACE("basecginfo is:");
+	TRACE("%s", basecginfo);
+
+	for (k = 0, it = klist; it && *it; it++, k++)
+		TRACE("kernel subsystem %d: %s", k, *it);
+
+	for (k = 0, it = nlist; it && *it; it++, k++)
+		TRACE("named subsystem %d: %s", k, *it);
+}
+
+struct generic_userns_exec_data {
+	struct hierarchy **hierarchies;
+	const char *container_cgroup;
+	struct lxc_conf *conf;
+	uid_t origuid; /* target uid in parent namespace */
+	char *path;
+};
+
+static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies,
+			const char *container_cgroup)
+{
+	if (!container_cgroup || !hierarchies)
+		return 0;
+
+	for (int i = 0; hierarchies[i]; i++) {
+		struct hierarchy *h = hierarchies[i];
+		int ret;
+
+		if (!h->container_full_path) {
+			h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, container_cgroup, NULL);
+		}
+
+		ret = lxc_rm_rf(h->container_full_path);
+		if (ret < 0) {
+			SYSERROR("Failed to destroy \"%s\"", h->container_full_path);
+			return -1;
+		}
+
+		free_disarm(h->container_full_path);
+	}
+
+	return 0;
+}
+
+static int isulad_cgroup_tree_remove_wrapper(void *data)
+{
+	struct generic_userns_exec_data *arg = data;
+	uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
+	gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
+	int ret;
+
+	if (!lxc_setgroups(0, NULL) && errno != EPERM)
+		return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)");
+
+	ret = setresgid(nsgid, nsgid, nsgid);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)",
+				       (int)nsgid, (int)nsgid, (int)nsgid);
+
+	ret = setresuid(nsuid, nsuid, nsuid);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)",
+				       (int)nsuid, (int)nsuid, (int)nsuid);
+
+	return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup);
+}
+
+__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops,
+						struct lxc_handler *handler)
+{
+	int ret;
+
+	if (!ops) {
+		ERROR("Called with uninitialized cgroup operations");
+		return false;
+	}
+
+	if (!ops->hierarchies) {
+		return false;
+	}
+
+	if (!handler) {
+		ERROR("Called with uninitialized handler");
+		return false;
+	}
+
+	if (!handler->conf) {
+		ERROR("Called with uninitialized conf");
+		return false;
+	}
+
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+	ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices);
+	if (ret < 0)
+		WARN("Failed to detach bpf program from cgroup");
+#endif
+
+	if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) {
+		struct generic_userns_exec_data wrap = {
+			.conf			= handler->conf,
+			.container_cgroup	= ops->container_cgroup,
+			.hierarchies		= ops->hierarchies,
+			.origuid		= 0,
+		};
+		ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper,
+				    &wrap, "cgroup_tree_remove_wrapper");
+	} else {
+		ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup);
+	}
+	if (ret < 0) {
+		SYSWARN("Failed to destroy cgroups");
+		return false;
+	}
+
+	return true;
+}
+
+__cgfsng_ops static void isulad_cgfsng_monitor_destroy(struct cgroup_ops *ops,
+						struct lxc_handler *handler)
+{
+	return;
+}
+
+__cgfsng_ops static inline bool isulad_cgfsng_monitor_create(struct cgroup_ops *ops,
+						      struct lxc_handler *handler)
+{
+	return true;
+}
+
+static bool isulad_copy_parent_file(char *path, char *file)
+{
+	int ret;
+	int len = 0;
+	char *value = NULL;
+	char *current = NULL;
+	char *fpath = NULL;
+	char *lastslash = NULL;
+	char oldv;
+
+	fpath = must_make_path(path, file, NULL);
+	current = read_file(fpath);
+
+	if (current == NULL) {
+		SYSERROR("Failed to read file \"%s\"", fpath);
+		free(fpath);
+		return false;
+	}
+
+	if (strcmp(current, "\n") != 0) {
+		free(fpath);
+		free(current);
+		return true;
+	}
+
+	free(fpath);
+	free(current);
+
+	lastslash = strrchr(path, '/');
+	if (lastslash == NULL) {
+		ERROR("Failed to detect \"/\" in \"%s\"", path);
+		return false;
+	}
+	oldv = *lastslash;
+	*lastslash = '\0';
+	fpath = must_make_path(path, file, NULL);
+	*lastslash = oldv;
+	len = lxc_read_from_file(fpath, NULL, 0);
+	if (len <= 0)
+		goto on_error;
+
+	value = must_realloc(NULL, len + 1);
+	ret = lxc_read_from_file(fpath, value, len);
+	if (ret != len)
+		goto on_error;
+	free(fpath);
+
+	fpath = must_make_path(path, file, NULL);
+	ret = lxc_write_to_file(fpath, value, len, false, 0666);
+	if (ret < 0)
+		SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath);
+	free(fpath);
+	free(value);
+	return ret >= 0;
+
+on_error:
+	SYSERROR("Failed to read file \"%s\"", fpath);
+	free(fpath);
+	free(value);
+	return false;
+}
+
+static bool build_sub_cpuset_cgroup_dir(char *cgpath)
+{
+	int ret;
+
+	ret = mkdir_p(cgpath, 0755);
+	if (ret < 0) {
+		if (errno != EEXIST) {
+			SYSERROR("Failed to create directory \"%s\"", cgpath);
+			return false;
+		}
+	}
+
+	/* copy parent's settings */
+	if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) {
+		SYSERROR("Failed to copy \"cpuset.cpus\" settings");
+		return false;
+	}
+
+	/* copy parent's settings */
+	if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) {
+		SYSERROR("Failed to copy \"cpuset.mems\" settings");
+		return false;
+	}
+
+	return true;
+}
+
+static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
+{
+	char *cgpath, *slash;
+	bool sub_mk_success = false;
+
+	if (!string_in_list(h->controllers, "cpuset"))
+		return true;
+
+	cgname += strspn(cgname, "/");
+
+	slash = strchr(cgname, '/');
+
+	if (slash != NULL) {
+		while (slash) {
+			*slash = '\0';
+			cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+			sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
+			free(cgpath);
+			*slash = '/';
+			if (!sub_mk_success) {
+				return false;
+			}
+			slash = strchr(slash + 1, '/');
+		}
+	}
+
+	cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+	sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
+	free(cgpath);
+	if (!sub_mk_success) {
+		return false;
+	}
+
+	return true;
+}
+
+static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode)
+{
+	const char *tmp = dir;
+	const char *orig = dir;
+
+	do {
+		int ret;
+		size_t cur_len;
+		char *makeme;
+
+		dir = tmp + strspn(tmp, "/");
+		tmp = dir + strcspn(dir, "/");
+
+		errno = ENOMEM;
+		cur_len = dir - orig;
+		makeme = strndup(orig, cur_len);
+		if (!makeme)
+			return -1;
+
+		ret = mkdir(makeme, mode);
+		if (ret < 0) {
+			if (errno != EEXIST) {
+				SYSERROR("Failed to create directory \"%s\"", makeme);
+				free(makeme);
+				return -1;
+			}
+		}
+		free(makeme);
+
+	} while (tmp != dir);
+
+	return 0;
+}
+
+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd)
+{
+	int ret;
+	__do_free char *path = NULL;
+
+	path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+
+	if (file_exists(path)) { // it must not already exist
+		ERROR("Cgroup path \"%s\" already exist.", path);
+		lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.",
+		                        __FILE__, __LINE__, path);
+		return false;
+	}
+
+	if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
+		ERROR("Failed to handle legacy cpuset controller");
+		return false;
+	}
+
+	ret = isulad_mkdir_eexist_on_last(path, 0755);
+	if (ret < 0) {
+		ERROR("Failed to create cgroup \"%s\"", path);
+		return false;
+	}
+
+	h->cgfd_con = lxc_open_dirfd(path);
+	if (h->cgfd_con < 0)
+		return log_error_errno(false, errno, "Failed to open %s", path);
+
+	if (h->container_full_path == NULL) {
+		h->container_full_path = move_ptr(path);
+	}
+
+	return true;
+}
+
+/* isulad: create hierarchies path, if fail, return the error */
+__cgfsng_ops static inline bool isulad_cgfsng_payload_create(struct cgroup_ops *ops,
+						      struct lxc_handler *handler)
+{
+	int i;
+	char *container_cgroup = ops->container_cgroup;
+
+	if (!container_cgroup) {
+		ERROR("cgfsng_create container_cgroup is invalid");
+		return false;
+	}
+
+	for (i = 0; ops->hierarchies[i]; i++) {
+		if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) {
+			SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_monitor_enter(struct cgroup_ops *ops,
+					      struct lxc_handler *handler)
+{
+	return true;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_payload_enter(struct cgroup_ops *ops,
+					      struct lxc_handler *handler)
+{
+	int len;
+	char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	if (!ops->hierarchies)
+		return true;
+
+	if (!ops->container_cgroup)
+		return ret_set_errno(false, ENOENT);
+
+	if (!handler || !handler->conf)
+		return ret_set_errno(false, EINVAL);
+
+	len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid);
+
+	for (int i = 0; ops->hierarchies[i]; i++) {
+		int ret;
+		char *fullpath;
+		int retry_count = 0;
+		int max_retry = 10;
+
+		fullpath = must_make_path(ops->hierarchies[i]->container_full_path,
+		                          "cgroup.procs", NULL);
+retry:
+		ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
+		if (ret != 0) {
+			if (retry_count < max_retry) {
+				SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count);
+				(void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup);
+				(void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755);
+				usleep(100 * 1000); /* 100 millisecond */
+				retry_count++;
+				goto retry;
+			}
+			SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
+			free(fullpath);
+			return false;
+		}
+		free(fullpath);
+	}
+
+	return true;
+}
+
+static int fchowmodat(int dirfd, const char *path, uid_t chown_uid,
+		      gid_t chown_gid, mode_t chmod_mode)
+{
+	int ret;
+
+	ret = fchownat(dirfd, path, chown_uid, chown_gid,
+		       AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+	if (ret < 0)
+		return log_warn_errno(-1,
+				      errno, "Failed to fchownat(%d, %s, %d, %d, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW )",
+				      dirfd, path, (int)chown_uid,
+				      (int)chown_gid);
+
+	ret = fchmodat(dirfd, (*path != '\0') ? path : ".", chmod_mode, 0);
+	if (ret < 0)
+		return log_warn_errno(-1, errno, "Failed to fchmodat(%d, %s, %d, AT_SYMLINK_NOFOLLOW)",
+				      dirfd, path, (int)chmod_mode);
+
+	return 0;
+}
+
+/* chgrp the container cgroups to container group.  We leave
+ * the container owner as cgroup owner.  So we must make the
+ * directories 775 so that the container can create sub-cgroups.
+ *
+ * Also chown the tasks and cgroup.procs files.  Those may not
+ * exist depending on kernel version.
+ */
+static int chown_cgroup_wrapper(void *data)
+{
+	int ret;
+	uid_t destuid;
+	struct generic_userns_exec_data *arg = data;
+	uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
+	gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
+
+	if (!lxc_setgroups(0, NULL) && errno != EPERM)
+		return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)");
+
+	ret = setresgid(nsgid, nsgid, nsgid);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)",
+				       (int)nsgid, (int)nsgid, (int)nsgid);
+
+	ret = setresuid(nsuid, nsuid, nsuid);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)",
+				       (int)nsuid, (int)nsuid, (int)nsuid);
+
+	destuid = get_ns_uid(arg->origuid);
+	if (destuid == LXC_INVALID_UID)
+		destuid = 0;
+
+	for (int i = 0; arg->hierarchies[i]; i++) {
+		int dirfd = arg->hierarchies[i]->cgfd_con;
+
+		(void)fchowmodat(dirfd, "", destuid, nsgid, 0775);
+
+		/*
+		 * Failures to chown() these are inconvenient but not
+		 * detrimental We leave these owned by the container launcher,
+		 * so that container root can write to the files to attach.  We
+		 * chmod() them 664 so that container systemd can write to the
+		 * files (which systemd in wily insists on doing).
+		 */
+
+		if (arg->hierarchies[i]->version == CGROUP_SUPER_MAGIC)
+			(void)fchowmodat(dirfd, "tasks", destuid, nsgid, 0664);
+
+		(void)fchowmodat(dirfd, "cgroup.procs", destuid, nsgid, 0664);
+
+		if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
+			continue;
+
+		for (char **p = arg->hierarchies[i]->cgroup2_chown; p && *p; p++)
+			(void)fchowmodat(dirfd, *p, destuid, nsgid, 0664);
+	}
+
+	return 0;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_chown(struct cgroup_ops *ops,
+				      struct lxc_conf *conf)
+{
+	struct generic_userns_exec_data wrap;
+
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	if (!ops->hierarchies)
+		return true;
+
+	if (!ops->container_cgroup)
+		return ret_set_errno(false, ENOENT);
+
+	if (!conf)
+		return ret_set_errno(false, EINVAL);
+
+	if (lxc_list_empty(&conf->id_map))
+		return true;
+
+	wrap.origuid = geteuid();
+	wrap.path = NULL;
+	wrap.hierarchies = ops->hierarchies;
+	wrap.conf = conf;
+
+	if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap, "chown_cgroup_wrapper") < 0)
+		return log_error_errno(false, errno, "Error requesting cgroup chown in new user namespace");
+
+	return true;
+}
+
+__cgfsng_ops void isulad_cgfsng_payload_finalize(struct cgroup_ops *ops)
+{
+	if (!ops)
+		return;
+
+	if (!ops->hierarchies)
+		return;
+
+	for (int i = 0; ops->hierarchies[i]; i++) {
+		struct hierarchy *h = ops->hierarchies[i];
+		/*
+		 * we don't keep the fds for non-unified hierarchies around
+		 * mainly because we don't make use of them anymore after the
+		 * core cgroup setup is done but also because there are quite a
+		 * lot of them.
+		 */
+		if (!is_unified_hierarchy(h))
+			close_prot_errno_disarm(h->cgfd_con);
+	}
+}
+
+/* cgroup-full:* is done, no need to create subdirs */
+static inline bool cg_mount_needs_subdirs(int type)
+{
+	return !(type >= LXC_AUTO_CGROUP_FULL_RO);
+}
+
+/* After $rootfs/sys/fs/container/controller/the/cg/path has been created,
+ * remount controller ro if needed and bindmount the cgroupfs onto
+ * control/the/cg/path.
+ */
+static int cg_legacy_mount_controllers(int type, struct hierarchy *h,
+				       char *controllerpath, char *cgpath,
+				       const char *container_cgroup)
+{
+	__do_free char *sourcepath = NULL;
+	int ret, remount_flags;
+	int flags = MS_BIND;
+
+	if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) {
+		ret = mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL);
+		if (ret < 0)
+			return log_error_errno(-1, errno, "Failed to bind mount \"%s\" onto \"%s\"",
+					       controllerpath, controllerpath);
+
+		remount_flags = add_required_remount_flags(controllerpath,
+							   controllerpath,
+							   flags | MS_REMOUNT);
+		ret = mount(controllerpath, controllerpath, "cgroup",
+			    remount_flags | MS_REMOUNT | MS_BIND | MS_RDONLY,
+			    NULL);
+		if (ret < 0)
+			return log_error_errno(-1, errno, "Failed to remount \"%s\" ro", controllerpath);
+
+		INFO("Remounted %s read-only", controllerpath);
+	}
+
+	sourcepath = must_make_path(h->mountpoint, h->container_base_path,
+				    container_cgroup, NULL);
+	if (type == LXC_AUTO_CGROUP_RO)
+		flags |= MS_RDONLY;
+
+	ret = mount(sourcepath, cgpath, "cgroup", flags, NULL);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to mount \"%s\" onto \"%s\"",
+				       h->controllers[0], cgpath);
+	INFO("Mounted \"%s\" onto \"%s\"", h->controllers[0], cgpath);
+
+	if (flags & MS_RDONLY) {
+		remount_flags = add_required_remount_flags(sourcepath, cgpath,
+							   flags | MS_REMOUNT);
+		ret = mount(sourcepath, cgpath, "cgroup", remount_flags, NULL);
+		if (ret < 0)
+			return log_error_errno(-1, errno, "Failed to remount \"%s\" ro", cgpath);
+		INFO("Remounted %s read-only", cgpath);
+	}
+
+	INFO("Completed second stage cgroup automounts for \"%s\"", cgpath);
+	return 0;
+}
+
+/* __cg_mount_direct
+ *
+ * Mount cgroup hierarchies directly without using bind-mounts. The main
+ * uses-cases are mounting cgroup hierarchies in cgroup namespaces and mounting
+ * cgroups for the LXC_AUTO_CGROUP_FULL option.
+ */
+static int __cg_mount_direct(int type, struct hierarchy *h,
+			     const char *controllerpath)
+{
+	 __do_free char *controllers = NULL;
+	 char *fstype = "cgroup2";
+	 unsigned long flags = 0;
+	 int ret;
+
+	 flags |= MS_NOSUID;
+	 flags |= MS_NOEXEC;
+	 flags |= MS_NODEV;
+	 flags |= MS_RELATIME;
+
+	 if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
+		 flags |= MS_RDONLY;
+
+	 if (h->version != CGROUP2_SUPER_MAGIC) {
+		 controllers = lxc_string_join(",", (const char **)h->controllers, false);
+		 if (!controllers)
+			 return -ENOMEM;
+		 fstype = "cgroup";
+	}
+
+	ret = mount("cgroup", controllerpath, fstype, flags, controllers);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s",
+				       controllerpath, fstype);
+
+	DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
+	return 0;
+}
+
+static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
+					       const char *controllerpath)
+{
+	return __cg_mount_direct(type, h, controllerpath);
+}
+
+static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
+				       const char *controllerpath)
+{
+	if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
+		return 0;
+
+	return __cg_mount_direct(type, h, controllerpath);
+}
+
+__cgfsng_ops static bool isulad_cgfsng_mount(struct cgroup_ops *ops,
+                                      struct lxc_handler *handler,
+                                      const char *root, int type)
+{
+	int i, ret;
+	char *tmpfspath = NULL;
+	char *systemdpath = NULL;
+	char *unifiedpath = NULL;
+	bool has_cgns = false, retval = false, wants_force_mount = false;
+	char **merged = NULL;
+
+	if ((type & LXC_AUTO_CGROUP_MASK) == 0)
+		return true;
+
+	if (type & LXC_AUTO_CGROUP_FORCE) {
+		type &= ~LXC_AUTO_CGROUP_FORCE;
+		wants_force_mount = true;
+	}
+
+	if (!wants_force_mount) {
+		if (!lxc_list_empty(&handler->conf->keepcaps))
+			wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
+		else
+			wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
+	}
+
+	has_cgns = cgns_supported();
+	if (has_cgns && !wants_force_mount)
+		return true;
+
+	if (type == LXC_AUTO_CGROUP_NOSPEC)
+		type = LXC_AUTO_CGROUP_MIXED;
+	else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
+		type = LXC_AUTO_CGROUP_FULL_MIXED;
+
+	/* Mount tmpfs */
+	tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
+	if (mkdir_p(tmpfspath, 0755) < 0) {
+		ERROR("Failed to create directory: %s", tmpfspath);
+		goto on_error;
+	}
+	ret = safe_mount(NULL, tmpfspath, "tmpfs",
+	                 MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
+	                 "size=10240k,mode=755", root, handler->conf->lsm_se_mount_context);
+	if (ret < 0)
+		goto on_error;
+
+	for (i = 0; ops->hierarchies[i]; i++) {
+		char *controllerpath = NULL;
+		char *path2 = NULL;
+		struct hierarchy *h = ops->hierarchies[i];
+		char *controller = strrchr(h->mountpoint, '/');
+
+		if (!controller)
+			continue;
+		controller++;
+
+		// isulad: symlink subcgroup
+		if (strchr(controller, ',') != NULL) {
+			int pret;
+			pret = lxc_append_string(&merged, controller);
+			if (pret < 0)
+				goto on_error;
+		}
+
+		controllerpath = must_make_path(tmpfspath, controller, NULL);
+		if (dir_exists(controllerpath)) {
+			free(controllerpath);
+			continue;
+		}
+
+		ret = mkdir(controllerpath, 0755);
+		if (ret < 0) {
+			SYSERROR("Error creating cgroup path: %s", controllerpath);
+			free(controllerpath);
+			goto on_error;
+		}
+
+		if (has_cgns && wants_force_mount) {
+			/* If cgroup namespaces are supported but the container
+			 * will not have CAP_SYS_ADMIN after it has started we
+			 * need to mount the cgroups manually.
+			 */
+			ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
+			free(controllerpath);
+			if (ret < 0)
+				goto on_error;
+
+			continue;
+		}
+
+		ret = cg_mount_cgroup_full(type, h, controllerpath);
+		if (ret < 0) {
+			free(controllerpath);
+			goto on_error;
+		}
+
+		if (!cg_mount_needs_subdirs(type)) {
+			free(controllerpath);
+			continue;
+		}
+
+		// isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container,
+		// isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container
+		path2 = must_make_path(controllerpath, NULL);
+		ret = mkdir_p(path2, 0755);
+		if (ret < 0) {
+			free(controllerpath);
+			free(path2);
+			goto on_error;
+		}
+
+		ret = cg_legacy_mount_controllers(type, h, controllerpath,
+		                                  path2, ops->container_cgroup);
+		free(controllerpath);
+		free(path2);
+		if (ret < 0)
+			goto on_error;
+	}
+
+	// isulad: symlink subcgroup
+	if (merged) {
+		char **mc = NULL;
+		for (mc = merged; *mc; mc++) {
+			char *token = NULL;
+			char *copy = must_copy_string(*mc);
+			lxc_iterate_parts(token, copy, ",") {
+				int mret;
+				char *link;
+				link = must_make_path(tmpfspath, token, NULL);
+				mret = symlink(*mc, link);
+				if (mret < 0 && errno != EEXIST) {
+					SYSERROR("Failed to create link %s for target %s", link, *mc);
+					free(copy);
+					free(link);
+					goto on_error;
+				}
+				free(link);
+			}
+			free(copy);
+		}
+	}
+
+
+	// isulad: remount /sys/fs/cgroup to readonly
+	if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) {
+		ret = mount(tmpfspath, tmpfspath, "bind",
+		            MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL);
+		if (ret < 0) {
+			SYSERROR("Failed to remount /sys/fs/cgroup.");
+			goto on_error;
+		}
+	}
+
+	// isulad: remount /sys/fs/cgroup/systemd to readwrite for system container
+	if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0)
+	{
+		unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL);
+		if (dir_exists(unifiedpath))
+		{
+			ret = umount2(unifiedpath, MNT_DETACH);
+			if (ret < 0)
+			{
+				SYSERROR("Failed to umount /sys/fs/cgroup/unified.");
+				goto on_error;
+			}
+		}
+
+		systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL);
+		ret = mount(systemdpath, systemdpath, "bind",
+					MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME | MS_BIND | MS_REMOUNT, NULL);
+		if (ret < 0)
+		{
+			SYSERROR("Failed to remount /sys/fs/cgroup/systemd.");
+			goto on_error;
+		}
+	}
+
+	retval = true;
+
+on_error:
+	free(tmpfspath);
+	if (systemdpath != NULL)
+	{
+		free(systemdpath);
+	}
+	if (unifiedpath != NULL)
+	{
+		free(unifiedpath);
+	}
+	lxc_free_array((void **)merged, free);
+	return retval;
+}
+
+/* Only root needs to escape to the cgroup of its init. */
+__cgfsng_ops static bool isulad_cgfsng_escape(const struct cgroup_ops *ops,
+				       struct lxc_conf *conf)
+{
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	if (!ops->hierarchies)
+		return true;
+
+	if (!conf)
+		return ret_set_errno(false, EINVAL);
+
+	if (conf->cgroup_meta.relative || geteuid())
+		return true;
+
+	for (int i = 0; ops->hierarchies[i]; i++) {
+		__do_free char *fullpath = NULL;
+		int ret;
+
+		fullpath =
+		    must_make_path(ops->hierarchies[i]->mountpoint,
+				   ops->hierarchies[i]->container_base_path,
+				   "cgroup.procs", NULL);
+		ret = lxc_write_to_file(fullpath, "0", 2, false, 0666);
+		if (ret != 0)
+			return log_error_errno(false, errno, "Failed to escape to cgroup \"%s\"", fullpath);
+	}
+
+	return true;
+}
+
+__cgfsng_ops static int isulad_cgfsng_num_hierarchies(struct cgroup_ops *ops)
+{
+	int i = 0;
+
+	if (!ops)
+		return ret_set_errno(-1, ENOENT);
+
+	if (!ops->hierarchies)
+		return 0;
+
+	for (; ops->hierarchies[i]; i++)
+		;
+
+	return i;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_get_hierarchies(struct cgroup_ops *ops, int n,
+						char ***out)
+{
+	int i;
+
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	if (!ops->hierarchies)
+		return ret_set_errno(false, ENOENT);
+
+	/* sanity check n */
+	for (i = 0; i < n; i++)
+		if (!ops->hierarchies[i])
+			return ret_set_errno(false, ENOENT);
+
+	*out = ops->hierarchies[i]->controllers;
+
+	return true;
+}
+
+static bool cg_legacy_freeze(struct cgroup_ops *ops)
+{
+	struct hierarchy *h;
+
+	h = get_hierarchy(ops, "freezer");
+	if (!h)
+		return ret_set_errno(-1, ENOENT);
+
+	return lxc_write_openat(h->container_full_path, "freezer.state",
+				"FROZEN", STRLITERALLEN("FROZEN"));
+}
+
+static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata,
+				    struct lxc_epoll_descr *descr)
+{
+	__do_close int duped_fd = -EBADF;
+	__do_free char *line = NULL;
+	__do_fclose FILE *f = NULL;
+	int state = PTR_TO_INT(cbdata);
+	size_t len;
+	const char *state_string;
+
+	duped_fd = dup(fd);
+	if (duped_fd < 0)
+		return LXC_MAINLOOP_ERROR;
+
+	if (lseek(duped_fd, 0, SEEK_SET) < (off_t)-1)
+		return LXC_MAINLOOP_ERROR;
+
+	f = fdopen(duped_fd, "re");
+	if (!f)
+		return LXC_MAINLOOP_ERROR;
+	move_fd(duped_fd);
+
+	if (state == 1)
+		state_string = "frozen 1";
+	else
+		state_string = "frozen 0";
+
+	while (getline(&line, &len, f) != -1)
+		if (strncmp(line, state_string, STRLITERALLEN("frozen") + 2) == 0)
+			return LXC_MAINLOOP_CLOSE;
+
+	return LXC_MAINLOOP_CONTINUE;
+}
+
+static int cg_unified_freeze(struct cgroup_ops *ops, int timeout)
+{
+	__do_close int fd = -EBADF;
+	call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL;
+	int ret;
+	struct lxc_epoll_descr descr;
+	struct hierarchy *h;
+
+	h = ops->unified;
+	if (!h)
+		return ret_set_errno(-1, ENOENT);
+
+	if (!h->container_full_path)
+		return ret_set_errno(-1, EEXIST);
+
+	if (timeout != 0) {
+		__do_free char *events_file = NULL;
+
+		events_file = must_make_path(h->container_full_path, "cgroup.events", NULL);
+		fd = open(events_file, O_RDONLY | O_CLOEXEC);
+		if (fd < 0)
+			return log_error_errno(-1, errno, "Failed to open cgroup.events file");
+
+		ret = lxc_mainloop_open(&descr);
+		if (ret)
+			return log_error_errno(-1, errno, "Failed to create epoll instance to wait for container freeze");
+
+		/* automatically cleaned up now */
+		descr_ptr = &descr;
+
+		ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){1}));
+		if (ret < 0)
+			return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
+	}
+
+	ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", "1", 1);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to open cgroup.freeze file");
+
+	if (timeout != 0 && lxc_mainloop(&descr, timeout))
+		return log_error_errno(-1, errno, "Failed to wait for container to be frozen");
+
+	return 0;
+}
+
+__cgfsng_ops static int isulad_cgfsng_freeze(struct cgroup_ops *ops, int timeout)
+{
+	if (!ops->hierarchies)
+		return ret_set_errno(-1, ENOENT);
+
+	if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED)
+		return cg_legacy_freeze(ops);
+
+	return cg_unified_freeze(ops, timeout);
+}
+
+static int cg_legacy_unfreeze(struct cgroup_ops *ops)
+{
+	struct hierarchy *h;
+
+	h = get_hierarchy(ops, "freezer");
+	if (!h)
+		return ret_set_errno(-1, ENOENT);
+
+	return lxc_write_openat(h->container_full_path, "freezer.state",
+				"THAWED", STRLITERALLEN("THAWED"));
+}
+
+static int cg_unified_unfreeze(struct cgroup_ops *ops, int timeout)
+{
+	__do_close int fd = -EBADF;
+	call_cleaner(lxc_mainloop_close)struct lxc_epoll_descr *descr_ptr = NULL;
+	int ret;
+	struct lxc_epoll_descr descr;
+	struct hierarchy *h;
+
+	h = ops->unified;
+	if (!h)
+		return ret_set_errno(-1, ENOENT);
+
+	if (!h->container_full_path)
+		return ret_set_errno(-1, EEXIST);
+
+	if (timeout != 0) {
+		__do_free char *events_file = NULL;
+
+		events_file = must_make_path(h->container_full_path, "cgroup.events", NULL);
+		fd = open(events_file, O_RDONLY | O_CLOEXEC);
+		if (fd < 0)
+			return log_error_errno(-1, errno, "Failed to open cgroup.events file");
+
+		ret = lxc_mainloop_open(&descr);
+		if (ret)
+			return log_error_errno(-1, errno, "Failed to create epoll instance to wait for container unfreeze");
+
+		/* automatically cleaned up now */
+		descr_ptr = &descr;
+
+		ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){0}));
+		if (ret < 0)
+			return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
+	}
+
+	ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", "0", 1);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to open cgroup.freeze file");
+
+	if (timeout != 0 && lxc_mainloop(&descr, timeout))
+		return log_error_errno(-1, errno, "Failed to wait for container to be unfrozen");
+
+	return 0;
+}
+
+__cgfsng_ops static int isulad_cgfsng_unfreeze(struct cgroup_ops *ops, int timeout)
+{
+	if (!ops->hierarchies)
+		return ret_set_errno(-1, ENOENT);
+
+	if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED)
+		return cg_legacy_unfreeze(ops);
+
+	return cg_unified_unfreeze(ops, timeout);
+}
+
+__cgfsng_ops static const char *isulad_cgfsng_get_cgroup(struct cgroup_ops *ops,
+						  const char *controller)
+{
+	struct hierarchy *h;
+
+	h = get_hierarchy(ops, controller);
+	if (!h)
+		return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
+				      controller ? controller : "(null)");
+
+	if (!h->container_full_path)
+		h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL);
+
+	return h->container_full_path
+		   ? h->container_full_path + strlen(h->mountpoint)
+		   : NULL;
+}
+
+__cgfsng_ops static const char *isulad_cgfsng_get_cgroup_full_path(struct cgroup_ops *ops,
+						  const char *controller)
+{
+	struct hierarchy *h;
+
+	h = get_hierarchy(ops, controller);
+	if (!h)
+		return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
+				      controller ? controller : "(null)");
+
+	if (!h->container_full_path)
+		h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL);
+
+	return h->container_full_path;
+}
+
+/* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
+ * which must be freed by the caller.
+ */
+static inline char *build_full_cgpath_from_monitorpath(struct hierarchy *h,
+						       const char *inpath,
+						       const char *filename)
+{
+	return must_make_path(h->mountpoint, inpath, filename, NULL);
+}
+
+static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t pid)
+{
+	int idx = 1;
+	int ret;
+	char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
+	size_t pidstr_len;
+
+	/* Create leaf cgroup. */
+	ret = mkdirat(unified_fd, ".lxc", 0755);
+	if (ret < 0 && errno != EEXIST)
+		return log_error_errno(-1, errno, "Failed to create leaf cgroup \".lxc\"");
+
+	pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
+	ret = lxc_writeat(unified_fd, ".lxc/cgroup.procs", pidstr, pidstr_len);
+	if (ret < 0)
+		ret = lxc_writeat(unified_fd, "cgroup.procs", pidstr, pidstr_len);
+	if (ret == 0)
+		return 0;
+
+	/* this is a non-leaf node */
+	if (errno != EBUSY)
+		return log_error_errno(-1, errno, "Failed to attach to unified cgroup");
+
+	do {
+		bool rm = false;
+		char attach_cgroup[STRLITERALLEN(".lxc-1000/cgroup.procs") + 1];
+		char *slash;
+
+		ret = snprintf(attach_cgroup, sizeof(attach_cgroup), ".lxc-%d/cgroup.procs", idx);
+		if (ret < 0 || (size_t)ret >= sizeof(attach_cgroup))
+			return ret_errno(EIO);
+
+		slash = &attach_cgroup[ret] - STRLITERALLEN("/cgroup.procs");
+		*slash = '\0';
+
+		ret = mkdirat(unified_fd, attach_cgroup, 0755);
+		if (ret < 0 && errno != EEXIST)
+			return log_error_errno(-1, errno, "Failed to create cgroup %s", attach_cgroup);
+		if (ret == 0)
+			rm = true;
+
+		*slash = '/';
+
+		ret = lxc_writeat(unified_fd, attach_cgroup, pidstr, pidstr_len);
+		if (ret == 0)
+			return 0;
+
+		if (rm && unlinkat(unified_fd, attach_cgroup, AT_REMOVEDIR))
+			SYSERROR("Failed to remove cgroup \"%d(%s)\"", unified_fd, attach_cgroup);
+
+		/* this is a non-leaf node */
+		if (errno != EBUSY)
+			return log_error_errno(-1, errno, "Failed to attach to unified cgroup");
+
+		idx++;
+	} while (idx < 1000);
+
+	return log_error_errno(-1, errno, "Failed to attach to unified cgroup");
+}
+
+static int cgroup_attach_create_leaf(const struct lxc_conf *conf,
+				     int unified_fd, int *sk_fd)
+{
+	__do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF;
+	int target_fds[2];
+	ssize_t ret;
+
+	/* Create leaf cgroup. */
+	ret = mkdirat(unified_fd, ".lxc", 0755);
+	if (ret < 0 && errno != EEXIST)
+		return log_error_errno(-1, errno, "Failed to create leaf cgroup \".lxc\"");
+
+	target_fd0 = openat(unified_fd, ".lxc/cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+	if (target_fd0 < 0)
+		return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\"");
+	target_fds[0] = target_fd0;
+
+	target_fd1 = openat(unified_fd, "cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+	if (target_fd1 < 0)
+		return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\"");
+	target_fds[1] = target_fd1;
+
+	ret = lxc_abstract_unix_send_fds(sk, target_fds, 2, NULL, 0);
+	if (ret <= 0)
+		return log_error_errno(-errno, errno, "Failed to send \".lxc/cgroup.procs\" fds %d and %d",
+				       target_fd0, target_fd1);
+
+	return log_debug(0, "Sent target cgroup fds %d and %d", target_fd0, target_fd1);
+}
+
+static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf,
+					int *sk_fd, pid_t pid)
+{
+	__do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF;
+	int target_fds[2];
+	char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
+	size_t pidstr_len;
+	ssize_t ret;
+
+	ret = lxc_abstract_unix_recv_fds(sk, target_fds, 2, NULL, 0);
+	if (ret <= 0)
+		return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
+	target_fd0 = target_fds[0];
+	target_fd1 = target_fds[1];
+
+	pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
+
+	ret = lxc_write_nointr(target_fd0, pidstr, pidstr_len);
+	if (ret > 0 && ret == pidstr_len)
+		return log_debug(0, "Moved process into target cgroup via fd %d", target_fd0);
+
+	ret = lxc_write_nointr(target_fd1, pidstr, pidstr_len);
+	if (ret > 0 && ret == pidstr_len)
+		return log_debug(0, "Moved process into target cgroup via fd %d", target_fd1);
+
+	return log_debug_errno(-1, errno, "Failed to move process into target cgroup via fd %d and %d",
+			       target_fd0, target_fd1);
+}
+
+struct userns_exec_unified_attach_data {
+	const struct lxc_conf *conf;
+	int unified_fd;
+	int sk_pair[2];
+	pid_t pid;
+};
+
+static int cgroup_unified_attach_child_wrapper(void *data)
+{
+	struct userns_exec_unified_attach_data *args = data;
+
+	if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
+	    args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
+		return ret_errno(EINVAL);
+
+	close_prot_errno_disarm(args->sk_pair[0]);
+	return cgroup_attach_create_leaf(args->conf, args->unified_fd,
+					 &args->sk_pair[1]);
+}
+
+static int cgroup_unified_attach_parent_wrapper(void *data)
+{
+	struct userns_exec_unified_attach_data *args = data;
+
+	if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
+	    args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
+		return ret_errno(EINVAL);
+
+	close_prot_errno_disarm(args->sk_pair[1]);
+	return cgroup_attach_move_into_leaf(args->conf, &args->sk_pair[0],
+					    args->pid);
+}
+
+int cgroup_attach(const struct lxc_conf *conf, const char *name,
+		  const char *lxcpath, pid_t pid)
+{
+	__do_close int unified_fd = -EBADF;
+	int ret;
+
+	if (!conf || !name || !lxcpath || pid <= 0)
+		return ret_errno(EINVAL);
+
+	unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
+	if (unified_fd < 0)
+		return ret_errno(EBADF);
+
+	if (!lxc_list_empty(&conf->id_map)) {
+		struct userns_exec_unified_attach_data args = {
+			.conf		= conf,
+			.unified_fd	= unified_fd,
+			.pid		= pid,
+		};
+
+		ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
+		if (ret < 0)
+			return -errno;
+
+		ret = userns_exec_minimal(conf,
+					  cgroup_unified_attach_parent_wrapper,
+					  &args,
+					  cgroup_unified_attach_child_wrapper,
+					  &args);
+	} else {
+		ret = cgroup_attach_leaf(conf, unified_fd, pid);
+	}
+
+	return ret;
+}
+
+/* Technically, we're always at a delegation boundary here (This is especially
+ * true when cgroup namespaces are available.). The reasoning is that in order
+ * for us to have been able to start a container in the first place the root
+ * cgroup must have been a leaf node. Now, either the container's init system
+ * has populated the cgroup and kept it as a leaf node or it has created
+ * subtrees. In the former case we will simply attach to the leaf node we
+ * created when we started the container in the latter case we create our own
+ * cgroup for the attaching process.
+ */
+static int __cg_unified_attach(const struct hierarchy *h,
+			       const struct lxc_conf *conf, const char *name,
+			       const char *lxcpath, pid_t pid,
+			       const char *controller)
+{
+	__do_close int unified_fd = -EBADF;
+	__do_free char *path = NULL, *cgroup = NULL;
+	int ret;
+
+	if (!conf || !name || !lxcpath || pid <= 0)
+		return ret_errno(EINVAL);
+
+	ret = cgroup_attach(conf, name, lxcpath, pid);
+	if (ret == 0)
+		return log_trace(0, "Attached to unified cgroup via command handler");
+	if (ret != -EBADF)
+		return log_error_errno(ret, errno, "Failed to attach to unified cgroup");
+
+	/* Fall back to retrieving the path for the unified cgroup. */
+	cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+	/* not running */
+	if (!cgroup)
+		return 0;
+
+	path = must_make_path(h->mountpoint, cgroup, NULL);
+
+	unified_fd = open(path, O_PATH | O_DIRECTORY | O_CLOEXEC);
+	if (unified_fd < 0)
+		return ret_errno(EBADF);
+
+	if (!lxc_list_empty(&conf->id_map)) {
+		struct userns_exec_unified_attach_data args = {
+			.conf		= conf,
+			.unified_fd	= unified_fd,
+			.pid		= pid,
+		};
+
+		ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
+		if (ret < 0)
+			return -errno;
+
+		ret = userns_exec_minimal(conf,
+					  cgroup_unified_attach_parent_wrapper,
+					  &args,
+					  cgroup_unified_attach_child_wrapper,
+					  &args);
+	} else {
+		ret = cgroup_attach_leaf(conf, unified_fd, pid);
+	}
+
+	return ret;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_attach(struct cgroup_ops *ops,
+				       const struct lxc_conf *conf,
+				       const char *name, const char *lxcpath,
+				       pid_t pid)
+{
+	int len, ret;
+	char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	if (!ops->hierarchies)
+		return true;
+
+	len = snprintf(pidstr, sizeof(pidstr), "%d", pid);
+	if (len < 0 || (size_t)len >= sizeof(pidstr))
+		return false;
+
+	for (int i = 0; ops->hierarchies[i]; i++) {
+		__do_free char *fullpath = NULL, *path = NULL;
+		struct hierarchy *h = ops->hierarchies[i];
+
+		if (h->version == CGROUP2_SUPER_MAGIC) {
+			ret = __cg_unified_attach(h, conf, name, lxcpath, pid,
+						  h->controllers[0]);
+			if (ret < 0)
+				return false;
+
+			continue;
+		}
+
+		path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
+		/* not running */
+		if (!path)
+			return false;
+
+		fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
+		ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
+		if (ret < 0)
+			return log_error_errno(false, errno, "Failed to attach %d to %s",
+					       (int)pid, fullpath);
+	}
+
+	return true;
+}
+
+__cgfsng_ops static int isulad_cgfsng_get(struct cgroup_ops *ops, const char *filename,
+                                   char *value, size_t len, const char *name,
+                                   const char *lxcpath)
+{
+	int ret = -1;
+	size_t controller_len;
+	char *controller, *p, *path;
+	struct hierarchy *h;
+
+	controller_len = strlen(filename);
+	controller = alloca(controller_len + 1);
+	(void)strlcpy(controller, filename, controller_len + 1);
+
+	p = strchr(controller, '.');
+	if (p)
+		*p = '\0';
+
+	const char *ori_path = ops->get_cgroup(ops, controller);
+	if (ori_path == NULL) {
+		ERROR("Failed to get cgroup path:%s", controller);
+		return -1;
+	}
+	path = safe_strdup(ori_path);
+
+	h = get_hierarchy(ops, controller);
+	if (h) {
+		char *fullpath;
+
+		fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+		ret = lxc_read_from_file(fullpath, value, len);
+		free(fullpath);
+	}
+	free(path);
+
+	return ret;
+}
+
+static int device_cgroup_parse_access(struct device_item *device, const char *val)
+{
+	for (int count = 0; count < 3; count++, val++) {
+		switch (*val) {
+		case 'r':
+			device->access[count] = *val;
+			break;
+		case 'w':
+			device->access[count] = *val;
+			break;
+		case 'm':
+			device->access[count] = *val;
+			break;
+		case '\n':
+		case '\0':
+			count = 3;
+			break;
+		default:
+			return ret_errno(EINVAL);
+		}
+	}
+
+	return 0;
+}
+
+int device_cgroup_rule_parse(struct device_item *device, const char *key,
+				    const char *val)
+{
+	int count, ret;
+	char temp[50];
+
+	if (strcmp("devices.allow", key) == 0)
+		device->allow = 1;
+	else
+		device->allow = 0;
+
+	if (strcmp(val, "a") == 0) {
+		/* global rule */
+		device->type = 'a';
+		device->major = -1;
+		device->minor = -1;
+		device->global_rule = device->allow
+					  ? LXC_BPF_DEVICE_CGROUP_BLACKLIST
+					  : LXC_BPF_DEVICE_CGROUP_WHITELIST;
+		device->allow = -1;
+		return 0;
+	}
+
+	/* local rule */
+	device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE;
+
+	switch (*val) {
+	case 'a':
+		__fallthrough;
+	case 'b':
+		__fallthrough;
+	case 'c':
+		device->type = *val;
+		break;
+	default:
+		return -1;
+	}
+
+	val++;
+	if (!isspace(*val))
+		return -1;
+	val++;
+	if (*val == '*') {
+		device->major = -1;
+		val++;
+	} else if (isdigit(*val)) {
+		memset(temp, 0, sizeof(temp));
+		for (count = 0; count < sizeof(temp) - 1; count++) {
+			temp[count] = *val;
+			val++;
+			if (!isdigit(*val))
+				break;
+		}
+		ret = lxc_safe_int(temp, &device->major);
+		if (ret)
+			return -1;
+	} else {
+		return -1;
+	}
+	if (*val != ':')
+		return -1;
+	val++;
+
+	/* read minor */
+	if (*val == '*') {
+		device->minor = -1;
+		val++;
+	} else if (isdigit(*val)) {
+		memset(temp, 0, sizeof(temp));
+		for (count = 0; count < sizeof(temp) - 1; count++) {
+			temp[count] = *val;
+			val++;
+			if (!isdigit(*val))
+				break;
+		}
+		ret = lxc_safe_int(temp, &device->minor);
+		if (ret)
+			return -1;
+	} else {
+		return -1;
+	}
+	if (!isspace(*val))
+		return -1;
+
+	return device_cgroup_parse_access(device, ++val);
+}
+
+__cgfsng_ops static int isulad_cgfsng_set(struct cgroup_ops *ops,
+                                   const char *filename, const char *value,
+                                   const char *name, const char *lxcpath)
+{
+	int ret = -1;
+	size_t controller_len;
+	char *controller, *p, *path;
+	struct hierarchy *h;
+
+	controller_len = strlen(filename);
+	controller = alloca(controller_len + 1);
+	(void)strlcpy(controller, filename, controller_len + 1);
+
+	p = strchr(controller, '.');
+	if (p)
+		*p = '\0';
+
+	const char *ori_path = ops->get_cgroup(ops, controller);
+	if (ori_path == NULL) {
+		ERROR("Failed to get cgroup path:%s", controller);
+		return -1;
+	}
+	path = safe_strdup(ori_path);
+
+	h = get_hierarchy(ops, controller);
+	if (h) {
+		char *fullpath;
+
+		fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+		ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
+		free(fullpath);
+	}
+	free(path);
+
+	return ret;
+}
+
+/* take devices cgroup line
+ *    /dev/foo rwx
+ * and convert it to a valid
+ *    type major:minor mode
+ * line. Return <0 on error. Dest is a preallocated buffer long enough to hold
+ * the output.
+ */
+static int device_cgroup_rule_parse_devpath(struct device_item *device,
+					    const char *devpath)
+{
+	__do_free char *path = NULL;
+	char *mode = NULL;
+	int n_parts, ret;
+	char *p;
+	struct stat sb;
+
+	path = must_copy_string(devpath);
+
+	/*
+	 * Read path followed by mode. Ignore any trailing text.
+	 * A '    # comment' would be legal. Technically other text is not
+	 * legal, we could check for that if we cared to.
+	 */
+	for (n_parts = 1, p = path; *p; p++) {
+		if (*p != ' ')
+			continue;
+		*p = '\0';
+
+		if (n_parts != 1)
+			break;
+		p++;
+		n_parts++;
+
+		while (*p == ' ')
+			p++;
+
+		mode = p;
+
+		if (*p == '\0')
+			return ret_set_errno(-1, EINVAL);
+	}
+
+	if (device_cgroup_parse_access(device, mode) < 0)
+		return -1;
+
+	if (n_parts == 1)
+		return ret_set_errno(-1, EINVAL);
+
+	ret = stat(path, &sb);
+	if (ret < 0)
+		return ret_set_errno(-1, errno);
+
+	mode_t m = sb.st_mode & S_IFMT;
+	switch (m) {
+	case S_IFBLK:
+		device->type = 'b';
+		break;
+	case S_IFCHR:
+		device->type = 'c';
+		break;
+	default:
+		return log_error_errno(-1, EINVAL, "Unsupported device type %i for \"%s\"", m, path);
+	}
+
+	device->major = MAJOR(sb.st_rdev);
+	device->minor = MINOR(sb.st_rdev);
+	device->allow = 1;
+	device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE;
+
+	return 0;
+}
+
+static int convert_devpath(const char *invalue, char *dest)
+{
+	struct device_item device = {0};
+	int ret;
+
+	ret = device_cgroup_rule_parse_devpath(&device, invalue);
+	if (ret < 0)
+		return -1;
+
+	ret = snprintf(dest, 50, "%c %d:%d %s", device.type, device.major,
+		       device.minor, device.access);
+	if (ret < 0 || ret >= 50)
+		return log_error_errno(-1, ENAMETOOLONG, "Error on configuration value \"%c %d:%d %s\" (max 50 chars)",
+				       device.type, device.major, device.minor, device.access);
+
+	return 0;
+}
+
+/* Called from setup_limits - here we have the container's cgroup_data because
+ * we created the cgroups.
+ */
+static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename,
+                              char *value, size_t len)
+{
+	char *fullpath = NULL;
+	char *p = NULL;
+	struct hierarchy *h = NULL;
+	int ret = 0;
+	char *controller = NULL;
+
+	len = strlen(filename);
+	if (SIZE_MAX - 1 < len) {
+		errno = EINVAL;
+		return -1;
+	}
+	controller = calloc(1, len + 1);
+	if (controller == NULL) {
+		errno = ENOMEM;
+		return -1;
+	}
+	(void)strlcpy(controller, filename, len + 1);
+
+	p = strchr(controller, '.');
+	if (p)
+		*p = '\0';
+
+
+	h = get_hierarchy(ops, controller);
+	if (!h) {
+		ERROR("Failed to setup limits for the \"%s\" controller. "
+		      "The controller seems to be unused by \"cgfsng\" cgroup "
+		      "driver or not enabled on the cgroup hierarchy",
+		      controller);
+		errno = ENOENT;
+		free(controller);
+		return -ENOENT;
+	}
+
+	fullpath = must_make_path(h->container_full_path, filename, NULL);
+	ret = lxc_read_from_file(fullpath, value, len);
+	free(fullpath);
+	free(controller);
+	return ret;
+}
+
+static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
+                              const char *value)
+{
+	size_t len;
+	char *fullpath, *p;
+	/* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
+	char converted_value[50];
+	struct hierarchy *h;
+	int ret = 0;
+	char *controller = NULL;
+	int retry_count = 0;
+	int max_retry = 10;
+	char *container_cgroup = ops->container_cgroup;
+
+	len = strlen(filename);
+	controller = alloca(len + 1);
+	(void)strlcpy(controller, filename, len + 1);
+
+	p = strchr(controller, '.');
+	if (p)
+		*p = '\0';
+
+	if (strcmp("devices.allow", filename) == 0 && value[0] == '/') {
+		ret = convert_devpath(value, converted_value);
+		if (ret < 0)
+			return ret;
+		value = converted_value;
+	}
+
+	h = get_hierarchy(ops, controller);
+	if (!h) {
+		ERROR("Failed to setup limits for the \"%s\" controller. "
+		      "The controller seems to be unused by \"cgfsng\" cgroup "
+		      "driver or not enabled on the cgroup hierarchy",
+		      controller);
+		errno = ENOENT;
+		return -ENOENT;
+	}
+
+	fullpath = must_make_path(h->container_full_path, filename, NULL);
+
+retry:
+	ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
+	if (ret != 0) {
+		if (retry_count < max_retry) {
+			SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath);
+			(void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup);
+			(void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755);
+			usleep(100 * 1000); /* 100 millisecond */
+			retry_count++;
+			goto retry;
+		}
+		lxc_write_error_message(ops->errfd,
+		                        "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".",
+		                        __FILE__, __LINE__, value, fullpath, strerror(errno));
+	}
+	free(fullpath);
+	return ret;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
+						    struct lxc_conf *conf,
+						    bool do_devices)
+{
+	__do_free struct lxc_list *sorted_cgroup_settings = NULL;
+	struct lxc_list *cgroup_settings = &conf->cgroup;
+	struct lxc_list *iterator, *next;
+	struct lxc_cgroup *cg;
+	bool ret = false;
+	char value[21 + 1] = { 0 };
+	long long int readvalue, setvalue;
+
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	if (!conf)
+		return ret_set_errno(false, EINVAL);
+
+	cgroup_settings = &conf->cgroup;
+	if (lxc_list_empty(cgroup_settings))
+		return true;
+
+	if (!ops->hierarchies)
+		return ret_set_errno(false, EINVAL);
+
+	sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
+	if (!sorted_cgroup_settings)
+		return false;
+
+	lxc_list_for_each(iterator, sorted_cgroup_settings) {
+		cg = iterator->elem;
+
+		if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
+			const char *cgvalue = cg->value;
+			if (strcmp(cg->subsystem, "files.limit") == 0) {
+				if (lxc_safe_long_long(cgvalue, &setvalue) != 0) {
+					SYSERROR("Invalid integer value %s", cgvalue);
+					goto out;
+				}
+				if (setvalue <= 0) {
+					cgvalue = "max";
+				}
+			}
+			if (isulad_cg_legacy_set_data(ops, cg->subsystem, cgvalue)) {
+				if (do_devices && (errno == EACCES || errno == EPERM)) {
+					SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue);
+					continue;
+				}
+				SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue);
+				goto out;
+			}
+			DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cgvalue);
+		}
+
+		// isulad: check cpu shares
+		if (strcmp(cg->subsystem, "cpu.shares") == 0) {
+			if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) {
+				SYSERROR("Error get %s", cg->subsystem);
+				goto out;
+			}
+			trim(value);
+			if (lxc_safe_long_long(cg->value, &setvalue) != 0) {
+				SYSERROR("Invalid value %s", cg->value);
+				goto out;
+			}
+			if (lxc_safe_long_long(value, &readvalue) != 0) {
+				SYSERROR("Invalid value %s", value);
+				goto out;
+			}
+			if (setvalue > readvalue) {
+				ERROR("The maximum allowed cpu-shares is %s", value);
+				lxc_write_error_message(ops->errfd,
+				                        "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".",
+				                        __FILE__, __LINE__, value);
+				goto out;
+			} else if (setvalue < readvalue) {
+				ERROR("The minimum allowed cpu-shares is %s", value);
+				lxc_write_error_message(ops->errfd,
+				                        "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".",
+				                        __FILE__, __LINE__, value);
+				goto out;
+			}
+		}
+	}
+
+	ret = true;
+	INFO("Limits for the legacy cgroup hierarchies have been setup");
+out:
+	lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
+		lxc_list_del(iterator);
+		free(iterator);
+	}
+
+	return ret;
+}
+
+/*
+ * Some of the parsing logic comes from the original cgroup device v1
+ * implementation in the kernel.
+ */
+static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
+				     struct lxc_conf *conf, const char *key,
+				     const char *val)
+{
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+	struct device_item device_item = {0};
+	int ret;
+
+	if (strcmp("devices.allow", key) == 0 && *val == '/')
+		ret = device_cgroup_rule_parse_devpath(&device_item, val);
+	else
+		ret = device_cgroup_rule_parse(&device_item, key, val);
+	if (ret < 0)
+		return log_error_errno(-1, EINVAL, "Failed to parse device string %s=%s", key, val);
+
+	ret = bpf_list_add_device(conf, &device_item);
+	if (ret < 0)
+		return -1;
+#endif
+	return 0;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_setup_limits(struct cgroup_ops *ops,
+					     struct lxc_handler *handler)
+{
+	struct lxc_list *cgroup_settings, *iterator;
+	struct hierarchy *h;
+	struct lxc_conf *conf;
+
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	if (!ops->hierarchies)
+		return true;
+
+	if (!ops->container_cgroup)
+		return ret_set_errno(false, EINVAL);
+
+	if (!handler || !handler->conf)
+		return ret_set_errno(false, EINVAL);
+	conf = handler->conf;
+
+	if (lxc_list_empty(&conf->cgroup2))
+		return true;
+	cgroup_settings = &conf->cgroup2;
+
+	if (!ops->unified)
+		return false;
+	h = ops->unified;
+
+	lxc_list_for_each (iterator, cgroup_settings) {
+		struct lxc_cgroup *cg = iterator->elem;
+		int ret;
+
+		if (strncmp("devices", cg->subsystem, 7) == 0) {
+			ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem,
+							cg->value);
+		} else {
+			ret = lxc_write_openat(h->container_full_path,
+					       cg->subsystem, cg->value,
+					       strlen(cg->value));
+			if (ret < 0)
+				return log_error_errno(false, errno, "Failed to set \"%s\" to \"%s\"",
+						       cg->subsystem, cg->value);
+		}
+		TRACE("Set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+	}
+
+	return log_info(true, "Limits for the unified cgroup hierarchy have been setup");
+}
+
+__cgfsng_ops bool isulad_cgfsng_devices_activate(struct cgroup_ops *ops,
+					  struct lxc_handler *handler)
+{
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+	__do_bpf_program_free struct bpf_program *devices = NULL;
+	int ret;
+	struct lxc_conf *conf;
+	struct hierarchy *unified;
+	struct lxc_list *it;
+	struct bpf_program *devices_old;
+
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	if (!ops->hierarchies)
+		return true;
+
+	if (!ops->container_cgroup)
+		return ret_set_errno(false, EEXIST);
+
+	if (!handler || !handler->conf)
+		return ret_set_errno(false, EINVAL);
+	conf = handler->conf;
+
+	unified = ops->unified;
+	if (!unified || !unified->bpf_device_controller ||
+	    !unified->container_full_path || lxc_list_empty(&conf->devices))
+		return true;
+
+	devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
+	if (!devices)
+		return log_error_errno(false, ENOMEM, "Failed to create new bpf program");
+
+	ret = bpf_program_init(devices);
+	if (ret)
+		return log_error_errno(false, ENOMEM, "Failed to initialize bpf program");
+
+	lxc_list_for_each(it, &conf->devices) {
+		struct device_item *cur = it->elem;
+
+		ret = bpf_program_append_device(devices, cur);
+		if (ret)
+			return log_error_errno(false, ENOMEM, "Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+					       cur->type,
+					       cur->major,
+					       cur->minor,
+					       cur->access,
+					       cur->allow,
+					       cur->global_rule);
+		TRACE("Added rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+		      cur->type,
+		      cur->major,
+		      cur->minor,
+		      cur->access,
+		      cur->allow,
+		      cur->global_rule);
+	}
+
+	ret = bpf_program_finalize(devices);
+	if (ret)
+		return log_error_errno(false, ENOMEM, "Failed to finalize bpf program");
+
+	ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
+					unified->container_full_path,
+					BPF_F_ALLOW_MULTI);
+	if (ret)
+		return log_error_errno(false, ENOMEM, "Failed to attach bpf program");
+
+	/* Replace old bpf program. */
+	devices_old = move_ptr(conf->cgroup2_devices);
+	conf->cgroup2_devices = move_ptr(devices);
+	devices = move_ptr(devices_old);
+#endif
+	return true;
+}
+
+bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
+{
+	__do_free char *add_controllers = NULL, *base_path = NULL;
+	__do_free_string_list char **parts = NULL;
+	struct hierarchy *unified = ops->unified;
+	ssize_t parts_len;
+	char **it;
+	size_t full_len = 0;
+
+	if (!ops->hierarchies || !pure_unified_layout(ops) ||
+	    !unified->controllers[0])
+		return true;
+
+	/* For now we simply enable all controllers that we have detected by
+	 * creating a string like "+memory +pids +cpu +io".
+	 * TODO: In the near future we might want to support "-<controller>"
+	 * etc. but whether supporting semantics like this make sense will need
+	 * some thinking.
+	 */
+	for (it = unified->controllers; it && *it; it++) {
+		full_len += strlen(*it) + 2;
+		add_controllers = must_realloc(add_controllers, full_len + 1);
+
+		if (unified->controllers[0] == *it)
+			add_controllers[0] = '\0';
+
+		(void)strlcat(add_controllers, "+", full_len + 1);
+		(void)strlcat(add_controllers, *it, full_len + 1);
+
+		if ((it + 1) && *(it + 1))
+			(void)strlcat(add_controllers, " ", full_len + 1);
+	}
+
+	parts = lxc_string_split(cgroup, '/');
+	if (!parts)
+		return false;
+
+	parts_len = lxc_array_len((void **)parts);
+	if (parts_len > 0)
+		parts_len--;
+
+	base_path = must_make_path(unified->mountpoint, unified->container_base_path, NULL);
+	for (ssize_t i = -1; i < parts_len; i++) {
+		int ret;
+		__do_free char *target = NULL;
+
+		if (i >= 0)
+			base_path = must_append_path(base_path, parts[i], NULL);
+		target = must_make_path(base_path, "cgroup.subtree_control", NULL);
+		ret = lxc_writeat(-1, target, add_controllers, full_len);
+		if (ret < 0)
+			return log_error_errno(false, errno, "Could not enable \"%s\" controllers in the unified cgroup \"%s\"",
+					       add_controllers, target);
+		TRACE("Enable \"%s\" controllers in the unified cgroup \"%s\"", add_controllers, target);
+	}
+
+	return true;
+}
+
+__cgfsng_ops bool isulad_cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
+{
+	return true;
+}
+
+__cgfsng_ops bool isulad_cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
+{
+	if (!ops)
+		return ret_set_errno(false, ENOENT);
+
+	return __cgfsng_delegate_controllers(ops, ops->container_cgroup);
+}
+
+static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
+				       char **controllers)
+{
+	if (!ops->cgroup_use)
+		return true;
+
+	for (char **cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) {
+		bool found = false;
+
+		for (char **cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) {
+			if (strcmp(*cur_use, *cur_ctrl) != 0)
+				continue;
+
+			found = true;
+			break;
+		}
+
+		if (found)
+			continue;
+
+		return false;
+	}
+
+	return true;
+}
+
+static void cg_unified_delegate(char ***delegate)
+{
+	__do_free char *buf = NULL;
+	char *standard[] = {"cgroup.subtree_control", "cgroup.threads", NULL};
+	char *token;
+	int idx;
+
+	buf = read_file("/sys/kernel/cgroup/delegate");
+	if (!buf) {
+		for (char **p = standard; p && *p; p++) {
+			idx = append_null_to_list((void ***)delegate);
+			(*delegate)[idx] = must_copy_string(*p);
+		}
+		SYSWARN("Failed to read /sys/kernel/cgroup/delegate");
+		return;
+	}
+
+	lxc_iterate_parts (token, buf, " \t\n") {
+		/*
+		 * We always need to chown this for both cgroup and
+		 * cgroup2.
+		 */
+		if (strcmp(token, "cgroup.procs") == 0)
+			continue;
+
+		idx = append_null_to_list((void ***)delegate);
+		(*delegate)[idx] = must_copy_string(token);
+	}
+}
+
+/* At startup, parse_hierarchies finds all the info we need about cgroup
+ * mountpoints and current cgroups, and stores it in @d.
+ */
+static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileged)
+{
+	__do_free char *basecginfo = NULL, *line = NULL;
+	__do_free_string_list char **klist = NULL, **nlist = NULL;
+	__do_fclose FILE *f = NULL;
+	int ret;
+	size_t len = 0;
+
+	/* Root spawned containers escape the current cgroup, so use init's
+	 * cgroups as our base in that case.
+	 */
+	if (!relative && (geteuid() == 0))
+		basecginfo = read_file("/proc/1/cgroup");
+	else
+		basecginfo = read_file("/proc/self/cgroup");
+	if (!basecginfo)
+		return ret_set_errno(-1, ENOMEM);
+
+	ret = get_existing_subsystems(&klist, &nlist);
+	if (ret < 0)
+		return log_error_errno(-1, errno, "Failed to retrieve available legacy cgroup controllers");
+
+	f = fopen("/proc/self/mountinfo", "re");
+	if (!f)
+		return log_error_errno(-1, errno, "Failed to open \"/proc/self/mountinfo\"");
+
+	lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
+
+	while (getline(&line, &len, f) != -1) {
+		__do_free char *base_cgroup = NULL, *mountpoint = NULL;
+		__do_free_string_list char **controller_list = NULL;
+		int type;
+		struct hierarchy *new;
+
+		type = get_cgroup_version(line);
+		if (type == 0)
+			continue;
+
+		if (type == CGROUP2_SUPER_MAGIC && ops->unified)
+			continue;
+
+		if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
+			if (type == CGROUP2_SUPER_MAGIC)
+				ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+			else if (type == CGROUP_SUPER_MAGIC)
+				ops->cgroup_layout = CGROUP_LAYOUT_LEGACY;
+		} else if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
+			if (type == CGROUP_SUPER_MAGIC)
+				ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+		} else if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) {
+			if (type == CGROUP2_SUPER_MAGIC)
+				ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+		}
+
+		controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
+		if (!controller_list && type == CGROUP_SUPER_MAGIC)
+			continue;
+
+		if (type == CGROUP_SUPER_MAGIC)
+			if (controller_list_is_dup(ops->hierarchies, controller_list)) {
+				TRACE("Skipping duplicating controller");
+				continue;
+			}
+
+		mountpoint = cg_hybrid_get_mountpoint(line);
+		if (!mountpoint) {
+			ERROR("Failed parsing mountpoint from \"%s\"", line);
+			continue;
+		}
+
+		if (type == CGROUP_SUPER_MAGIC)
+			base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
+		else
+			base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
+		if (!base_cgroup) {
+			ERROR("Failed to find current cgroup");
+			continue;
+		}
+
+		trim(base_cgroup);
+		prune_init_scope(base_cgroup);
+
+		/* isulad: do not test writeable, if we run isulad in docker without cgroup namespace.
+		 * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */
+
+		/*
+		 * reason:base cgroup may be started with /system.slice when cg_hybrid_init
+		 *	read /proc/1/cgroup on host, and cgroup init will set all containers
+		 *	cgroup path under /sys/fs/cgroup/<controller>/system.slice/xxx/lxc
+		 *	directory, this is not consistent with docker. The default cgroup path
+		 *	should be under /sys/fs/cgroup/<controller>/lxc directory.
+		 */
+
+		if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') {
+			base_cgroup[1] = '\0';
+		}
+
+		if (type == CGROUP2_SUPER_MAGIC) {
+			char *cgv2_ctrl_path;
+
+			cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
+							"cgroup.controllers",
+							NULL);
+
+			controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
+			free(cgv2_ctrl_path);
+			if (!controller_list) {
+				controller_list = cg_unified_make_empty_controller();
+				TRACE("No controllers are enabled for "
+				      "delegation in the unified hierarchy");
+			}
+		}
+
+		/* Exclude all controllers that cgroup use does not want. */
+		if (!cgroup_use_wants_controllers(ops, controller_list)) {
+			TRACE("Skipping controller");
+			continue;
+		}
+
+		new = add_hierarchy(&ops->hierarchies, move_ptr(controller_list), move_ptr(mountpoint), move_ptr(base_cgroup), type);
+		if (type == CGROUP2_SUPER_MAGIC && !ops->unified) {
+			if (unprivileged)
+				cg_unified_delegate(&new->cgroup2_chown);
+			ops->unified = new;
+		}
+	}
+
+	TRACE("Writable cgroup hierarchies:");
+	lxc_cgfsng_print_hierarchies(ops);
+
+	/* verify that all controllers in cgroup.use and all crucial
+	 * controllers are accounted for
+	 */
+	if (!all_controllers_found(ops))
+		return log_error_errno(-1, ENOENT, "Failed to find all required controllers");
+
+	return 0;
+}
+
+/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
+static char *cg_unified_get_current_cgroup(bool relative)
+{
+	__do_free char *basecginfo = NULL;
+	char *copy;
+	char *base_cgroup;
+
+	if (!relative && (geteuid() == 0))
+		basecginfo = read_file("/proc/1/cgroup");
+	else
+		basecginfo = read_file("/proc/self/cgroup");
+	if (!basecginfo)
+		return NULL;
+
+	base_cgroup = strstr(basecginfo, "0::/");
+	if (!base_cgroup)
+		return NULL;
+
+	base_cgroup = base_cgroup + 3;
+	copy = copy_to_eol(base_cgroup);
+	if (!copy)
+		return NULL;
+
+	return trim(copy);
+}
+
+static int cg_unified_init(struct cgroup_ops *ops, bool relative,
+			   bool unprivileged)
+{
+	__do_free char *subtree_path = NULL;
+	int ret;
+	char *mountpoint;
+	char **delegatable;
+	struct hierarchy *new;
+	char *base_cgroup = NULL;
+
+	ret = unified_cgroup_hierarchy();
+	if (ret == -ENOMEDIUM)
+		return ret_errno(ENOMEDIUM);
+
+	if (ret != CGROUP2_SUPER_MAGIC)
+		return 0;
+
+	base_cgroup = cg_unified_get_current_cgroup(relative);
+	if (!base_cgroup)
+		return ret_errno(EINVAL);
+	if (!relative)
+		prune_init_scope(base_cgroup);
+
+	/*
+	 * We assume that the cgroup we're currently in has been delegated to
+	 * us and we are free to further delege all of the controllers listed
+	 * in cgroup.controllers further down the hierarchy.
+	 */
+	mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT);
+	subtree_path = must_make_path(mountpoint, base_cgroup, "cgroup.controllers", NULL);
+	delegatable = cg_unified_get_controllers(subtree_path);
+	if (!delegatable)
+		delegatable = cg_unified_make_empty_controller();
+	if (!delegatable[0])
+		TRACE("No controllers are enabled for delegation");
+
+	/* TODO: If the user requested specific controllers via lxc.cgroup.use
+	 * we should verify here. The reason I'm not doing it right is that I'm
+	 * not convinced that lxc.cgroup.use will be the future since it is a
+	 * global property. I much rather have an option that lets you request
+	 * controllers per container.
+	 */
+
+	new = add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
+	if (unprivileged)
+		cg_unified_delegate(&new->cgroup2_chown);
+
+	if (bpf_devices_cgroup_supported())
+		new->bpf_device_controller = 1;
+
+	ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+	ops->unified = new;
+
+	return CGROUP2_SUPER_MAGIC;
+}
+
+static int isulad_cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
+{
+	int ret;
+	const char *tmp;
+	bool relative = conf->cgroup_meta.relative;
+
+	tmp = lxc_global_config_value("lxc.cgroup.use");
+	if (tmp) {
+		__do_free char *pin = NULL;
+		char *chop, *cur;
+
+		pin = must_copy_string(tmp);
+		chop = pin;
+
+		lxc_iterate_parts(cur, chop, ",")
+			must_append_string(&ops->cgroup_use, cur);
+	}
+
+	ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map));
+	if (ret < 0)
+		return -1;
+
+	if (ret == CGROUP2_SUPER_MAGIC)
+		return 0;
+
+	return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
+}
+
+__cgfsng_ops static int isulad_cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
+{
+	const char *cgroup_pattern;
+	const char *cgroup_tree;
+	__do_free char *container_cgroup = NULL, *__cgroup_tree = NULL;
+	size_t len;
+
+	if (!ops)
+		return ret_set_errno(-1, ENOENT);
+
+	/* copy system-wide cgroup information */
+	cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
+	if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0)
+		ops->cgroup_pattern = must_copy_string(cgroup_pattern);
+
+	if (conf->cgroup_meta.dir) {
+		cgroup_tree = conf->cgroup_meta.dir;
+		container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL);
+	} else if (ops->cgroup_pattern) {
+		__cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern);
+		if (!__cgroup_tree)
+			return ret_set_errno(-1, ENOMEM);
+
+		cgroup_tree = __cgroup_tree;
+		container_cgroup = must_concat(&len, cgroup_tree, NULL);
+	} else {
+		cgroup_tree = NULL;
+		container_cgroup = must_concat(&len, conf->name, NULL);
+	}
+	if (!container_cgroup)
+		return ret_set_errno(-1, ENOMEM);
+
+	ops->container_cgroup = move_ptr(container_cgroup);
+
+	return 0;
+}
+
+struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
+{
+	__do_free struct cgroup_ops *cgfsng_ops = NULL;
+
+	cgfsng_ops = malloc(sizeof(struct cgroup_ops));
+	if (!cgfsng_ops)
+		return ret_set_errno(NULL, ENOMEM);
+
+	memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
+	cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
+
+	if (isulad_cg_init(cgfsng_ops, conf))
+		return NULL;
+
+	cgfsng_ops->data_init = isulad_cgfsng_data_init;
+
+	cgfsng_ops->errfd = conf ? conf->errpipe[1] : -1;
+	cgfsng_ops->get_cgroup_full_path = isulad_cgfsng_get_cgroup_full_path;
+	cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy;
+	cgfsng_ops->monitor_destroy = isulad_cgfsng_monitor_destroy;
+	cgfsng_ops->monitor_create = isulad_cgfsng_monitor_create;
+	cgfsng_ops->monitor_enter = isulad_cgfsng_monitor_enter;
+	cgfsng_ops->monitor_delegate_controllers = isulad_cgfsng_monitor_delegate_controllers;
+	cgfsng_ops->payload_delegate_controllers = isulad_cgfsng_payload_delegate_controllers;
+	cgfsng_ops->payload_create = isulad_cgfsng_payload_create;
+	cgfsng_ops->payload_enter = isulad_cgfsng_payload_enter;
+	cgfsng_ops->payload_finalize = isulad_cgfsng_payload_finalize;
+	cgfsng_ops->escape = isulad_cgfsng_escape;
+	cgfsng_ops->num_hierarchies = isulad_cgfsng_num_hierarchies;
+	cgfsng_ops->get_hierarchies = isulad_cgfsng_get_hierarchies;
+	cgfsng_ops->get_cgroup = isulad_cgfsng_get_cgroup;
+	cgfsng_ops->get = isulad_cgfsng_get;
+	cgfsng_ops->set = isulad_cgfsng_set;
+	cgfsng_ops->freeze = isulad_cgfsng_freeze;
+	cgfsng_ops->unfreeze = isulad_cgfsng_unfreeze;
+	cgfsng_ops->setup_limits_legacy = isulad_cgfsng_setup_limits_legacy;
+	cgfsng_ops->setup_limits = isulad_cgfsng_setup_limits;
+	cgfsng_ops->driver = "isulad_cgfsng";
+	cgfsng_ops->version = "1.0.0";
+	cgfsng_ops->attach = isulad_cgfsng_attach;
+	cgfsng_ops->chown = isulad_cgfsng_chown;
+	cgfsng_ops->mount = isulad_cgfsng_mount;
+	cgfsng_ops->devices_activate = isulad_cgfsng_devices_activate;
+
+	return move_ptr(cgfsng_ops);
+}
--
2.25.1