1 Star 0 Fork 44

hongkeyang/lxc

forked from src-openEuler/lxc 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0009-cgroup-refact-cgroup-manager-to-single-file.patch 120.05 KB
一键复制 编辑 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416
From 4592fbcbd0be862cf37a3090f58a4491c430e71a Mon Sep 17 00:00:00 2001
From: lifeng68 <[email protected]>
Date: Mon, 2 Nov 2020 16:53:19 +0800
Subject: [PATCH 09/10] cgroup: refact cgroup manager to single file
Signed-off-by: lifeng68 <[email protected]>
---
src/lxc/Makefile.am | 5 +-
src/lxc/cgroups/cgfsng.c | 1030 +---------
src/lxc/cgroups/isulad_cgfsng.c | 3115 +++++++++++++++++++++++++++++++
3 files changed, 3147 insertions(+), 1003 deletions(-)
create mode 100644 src/lxc/cgroups/isulad_cgfsng.c
diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am
index 0e1ba8da9..dc49c7e22 100644
--- a/src/lxc/Makefile.am
+++ b/src/lxc/Makefile.am
@@ -107,7 +107,6 @@ liblxc_la_SOURCES = af_unix.c af_unix.h \
api_extensions.h \
attach.c attach.h \
caps.c caps.h \
- cgroups/cgfsng.c \
cgroups/cgroup.c cgroups/cgroup.h \
cgroups/cgroup2_devices.c cgroups/cgroup2_devices.h \
cgroups/cgroup_utils.c cgroups/cgroup_utils.h \
@@ -174,7 +173,11 @@ liblxc_la_SOURCES += isulad_utils.c isulad_utils.h \
json/logger_json_file.c json/logger_json_file.h \
json/oci_runtime_spec.c json/oci_runtime_spec.h \
json/read-file.c json/read-file.h \
+ cgroups/isulad_cgfsng.c \
exec_commands.c exec_commands.h
+
+else
+liblxc_la_SOURCES += cgroups/cgfsng.c
endif
if IS_BIONIC
diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
index 1ff3d9812..9b9aaf6c3 100644
--- a/src/lxc/cgroups/cgfsng.c
+++ b/src/lxc/cgroups/cgfsng.c
@@ -214,7 +214,6 @@ static char *read_file(const char *fnam)
return move_ptr(buf);
}
-#ifndef HAVE_ISULAD
/* Taken over modified from the kernel sources. */
#define NBITS 32 /* bits in uint32_t */
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
@@ -477,14 +476,13 @@ static bool copy_parent_file(const char *parent_cgroup,
value, child_cgroup, file);
return true;
}
-#endif
+
static inline bool is_unified_hierarchy(const struct hierarchy *h)
{
return h->version == CGROUP2_SUPER_MAGIC;
}
-#ifndef HAVE_ISULAD
/*
* Initialize the cpuset hierarchy in first directory of @cgroup_leaf and set
* cgroup.clone_children so that children inherit settings. Since the
@@ -564,7 +562,6 @@ static int cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h,
return fret;
}
-#endif
/* Given two null-terminated lists of strings, return true if any string is in
* both.
@@ -958,107 +955,6 @@ struct generic_userns_exec_data {
char *path;
};
-#ifdef HAVE_ISULAD
-
-static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies,
- const char *container_cgroup)
-{
- if (!container_cgroup || !hierarchies)
- return 0;
-
- for (int i = 0; hierarchies[i]; i++) {
- struct hierarchy *h = hierarchies[i];
- int ret;
-
- if (!h->container_full_path) {
- h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, container_cgroup, NULL);
- }
-
- ret = lxc_rm_rf(h->container_full_path);
- if (ret < 0) {
- SYSERROR("Failed to destroy \"%s\"", h->container_full_path);
- return -1;
- }
-
- free_disarm(h->container_full_path);
- }
-
- return 0;
-}
-
-static int isulad_cgroup_tree_remove_wrapper(void *data)
-{
- struct generic_userns_exec_data *arg = data;
- uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
- gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
- int ret;
-
- if (!lxc_setgroups(0, NULL) && errno != EPERM)
- return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)");
-
- ret = setresgid(nsgid, nsgid, nsgid);
- if (ret < 0)
- return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)",
- (int)nsgid, (int)nsgid, (int)nsgid);
-
- ret = setresuid(nsuid, nsuid, nsuid);
- if (ret < 0)
- return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)",
- (int)nsuid, (int)nsuid, (int)nsuid);
-
- return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup);
-}
-
-__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops,
- struct lxc_handler *handler)
-{
- int ret;
-
- if (!ops) {
- ERROR("Called with uninitialized cgroup operations");
- return false;
- }
-
- if (!ops->hierarchies) {
- return false;
- }
-
- if (!handler) {
- ERROR("Called with uninitialized handler");
- return false;
- }
-
- if (!handler->conf) {
- ERROR("Called with uninitialized conf");
- return false;
- }
-
-#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
- ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices);
- if (ret < 0)
- WARN("Failed to detach bpf program from cgroup");
-#endif
-
- if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) {
- struct generic_userns_exec_data wrap = {
- .conf = handler->conf,
- .container_cgroup = ops->container_cgroup,
- .hierarchies = ops->hierarchies,
- .origuid = 0,
- };
- ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper,
- &wrap, "cgroup_tree_remove_wrapper");
- } else {
- ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup);
- }
- if (ret < 0) {
- SYSWARN("Failed to destroy cgroups");
- return false;
- }
-
- return true;
-}
-#else
static int cgroup_tree_remove(struct hierarchy **hierarchies,
const char *container_cgroup)
{
@@ -1149,15 +1045,7 @@ __cgfsng_ops static void cgfsng_payload_destroy(struct cgroup_ops *ops,
if (ret < 0)
SYSWARN("Failed to destroy cgroups");
}
-#endif
-#ifdef HAVE_ISULAD
-__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
- struct lxc_handler *handler)
-{
- return;
-}
-#else
__cgfsng_ops static void cgfsng_monitor_destroy(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
@@ -1230,15 +1118,6 @@ try_lxc_rm_rf:
WARN("Failed to destroy \"%s\"", h->monitor_full_path);
}
}
-#endif
-
-#ifdef HAVE_ISULAD
-__cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
- struct lxc_handler *handler)
-{
- return true;
-}
-#else
static int mkdir_eexist_on_last(const char *dir, mode_t mode)
{
@@ -1398,227 +1277,7 @@ __cgfsng_ops static inline bool cgfsng_monitor_create(struct cgroup_ops *ops,
ops->monitor_cgroup = move_ptr(monitor_cgroup);
return log_info(true, "The monitor process uses \"%s\" as cgroup", ops->monitor_cgroup);
}
-#endif
-
-#ifdef HAVE_ISULAD
-
-static bool isulad_copy_parent_file(char *path, char *file)
-{
- int ret;
- int len = 0;
- char *value = NULL;
- char *current = NULL;
- char *fpath = NULL;
- char *lastslash = NULL;
- char oldv;
-
- fpath = must_make_path(path, file, NULL);
- current = read_file(fpath);
-
- if (current == NULL) {
- SYSERROR("Failed to read file \"%s\"", fpath);
- free(fpath);
- return false;
- }
-
- if (strcmp(current, "\n") != 0) {
- free(fpath);
- free(current);
- return true;
- }
-
- free(fpath);
- free(current);
-
- lastslash = strrchr(path, '/');
- if (lastslash == NULL) {
- ERROR("Failed to detect \"/\" in \"%s\"", path);
- return false;
- }
- oldv = *lastslash;
- *lastslash = '\0';
- fpath = must_make_path(path, file, NULL);
- *lastslash = oldv;
- len = lxc_read_from_file(fpath, NULL, 0);
- if (len <= 0)
- goto on_error;
-
- value = must_realloc(NULL, len + 1);
- ret = lxc_read_from_file(fpath, value, len);
- if (ret != len)
- goto on_error;
- free(fpath);
-
- fpath = must_make_path(path, file, NULL);
- ret = lxc_write_to_file(fpath, value, len, false, 0666);
- if (ret < 0)
- SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath);
- free(fpath);
- free(value);
- return ret >= 0;
-
-on_error:
- SYSERROR("Failed to read file \"%s\"", fpath);
- free(fpath);
- free(value);
- return false;
-}
-
-static bool build_sub_cpuset_cgroup_dir(char *cgpath)
-{
- int ret;
-
- ret = mkdir_p(cgpath, 0755);
- if (ret < 0) {
- if (errno != EEXIST) {
- SYSERROR("Failed to create directory \"%s\"", cgpath);
- return false;
- }
- }
-
- /* copy parent's settings */
- if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) {
- SYSERROR("Failed to copy \"cpuset.cpus\" settings");
- return false;
- }
-
- /* copy parent's settings */
- if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) {
- SYSERROR("Failed to copy \"cpuset.mems\" settings");
- return false;
- }
-
- return true;
-}
-
-static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
-{
- char *cgpath, *slash;
- bool sub_mk_success = false;
-
- if (!string_in_list(h->controllers, "cpuset"))
- return true;
-
- cgname += strspn(cgname, "/");
-
- slash = strchr(cgname, '/');
-
- if (slash != NULL) {
- while (slash) {
- *slash = '\0';
- cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
- sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
- free(cgpath);
- *slash = '/';
- if (!sub_mk_success) {
- return false;
- }
- slash = strchr(slash + 1, '/');
- }
- }
-
- cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
- sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
- free(cgpath);
- if (!sub_mk_success) {
- return false;
- }
-
- return true;
-}
-
-static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode)
-{
- const char *tmp = dir;
- const char *orig = dir;
-
- do {
- int ret;
- size_t cur_len;
- char *makeme;
-
- dir = tmp + strspn(tmp, "/");
- tmp = dir + strcspn(dir, "/");
-
- errno = ENOMEM;
- cur_len = dir - orig;
- makeme = strndup(orig, cur_len);
- if (!makeme)
- return -1;
-
- ret = mkdir(makeme, mode);
- if (ret < 0) {
- if (errno != EEXIST) {
- SYSERROR("Failed to create directory \"%s\"", makeme);
- free(makeme);
- return -1;
- }
- }
- free(makeme);
-
- } while (tmp != dir);
- return 0;
-}
-
-static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd)
-{
- int ret;
- __do_free char *path = NULL;
-
- path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
-
- if (file_exists(path)) { // it must not already exist
- ERROR("Cgroup path \"%s\" already exist.", path);
- lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.",
- __FILE__, __LINE__, path);
- return false;
- }
-
- if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
- ERROR("Failed to handle legacy cpuset controller");
- return false;
- }
-
- ret = isulad_mkdir_eexist_on_last(path, 0755);
- if (ret < 0) {
- ERROR("Failed to create cgroup \"%s\"", path);
- return false;
- }
-
- h->cgfd_con = lxc_open_dirfd(path);
- if (h->cgfd_con < 0)
- return log_error_errno(false, errno, "Failed to open %s", path);
-
- if (h->container_full_path == NULL) {
- h->container_full_path = move_ptr(path);
- }
-
- return true;
-}
-
-/* isulad: create hierarchies path, if fail, return the error */
-__cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
- struct lxc_handler *handler)
-{
- int i;
- char *container_cgroup = ops->container_cgroup;
-
- if (!container_cgroup) {
- ERROR("cgfsng_create container_cgroup is invalid");
- return false;
- }
-
- for (i = 0; ops->hierarchies[i]; i++) {
- if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) {
- SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path);
- return false;
- }
- }
-
- return true;
-}
-#else
/*
* Try to create the same cgroup in all hierarchies. Start with cgroup_pattern;
* next cgroup_pattern-1, -2, ..., -999.
@@ -1698,15 +1357,7 @@ __cgfsng_ops static inline bool cgfsng_payload_create(struct cgroup_ops *ops,
INFO("The container process uses \"%s\" as cgroup", ops->container_cgroup);
return true;
}
-#endif
-#ifdef HAVE_ISULAD
-__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
- struct lxc_handler *handler)
-{
- return true;
-}
-#else
__cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
@@ -1758,58 +1409,7 @@ __cgfsng_ops static bool cgfsng_monitor_enter(struct cgroup_ops *ops,
return true;
}
-#endif
-#ifdef HAVE_ISULAD
-__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
- struct lxc_handler *handler)
-{
- int len;
- char pidstr[INTTYPE_TO_STRLEN(pid_t)];
-
- if (!ops)
- return ret_set_errno(false, ENOENT);
-
- if (!ops->hierarchies)
- return true;
-
- if (!ops->container_cgroup)
- return ret_set_errno(false, ENOENT);
-
- if (!handler || !handler->conf)
- return ret_set_errno(false, EINVAL);
-
- len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid);
-
- for (int i = 0; ops->hierarchies[i]; i++) {
- int ret;
- char *fullpath;
- int retry_count = 0;
- int max_retry = 10;
-
- fullpath = must_make_path(ops->hierarchies[i]->container_full_path,
- "cgroup.procs", NULL);
-retry:
- ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
- if (ret != 0) {
- if (retry_count < max_retry) {
- SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count);
- (void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup);
- (void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755);
- usleep(100 * 1000); /* 100 millisecond */
- retry_count++;
- goto retry;
- }
- SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
- free(fullpath);
- return false;
- }
- free(fullpath);
- }
-
- return true;
-}
-#else
__cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
struct lxc_handler *handler)
{
@@ -1841,7 +1441,6 @@ __cgfsng_ops static bool cgfsng_payload_enter(struct cgroup_ops *ops,
return true;
}
-#endif
static int fchowmodat(int dirfd, const char *path, uid_t chown_uid,
gid_t chown_gid, mode_t chmod_mode)
@@ -2056,234 +1655,39 @@ static int __cg_mount_direct(int type, struct hierarchy *h,
flags |= MS_RELATIME;
if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
- flags |= MS_RDONLY;
-
- if (h->version != CGROUP2_SUPER_MAGIC) {
- controllers = lxc_string_join(",", (const char **)h->controllers, false);
- if (!controllers)
- return -ENOMEM;
- fstype = "cgroup";
- }
-
- ret = mount("cgroup", controllerpath, fstype, flags, controllers);
- if (ret < 0)
- return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s",
- controllerpath, fstype);
-
- DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
- return 0;
-}
-
-static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
- const char *controllerpath)
-{
- return __cg_mount_direct(type, h, controllerpath);
-}
-
-static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
- const char *controllerpath)
-{
- if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
- return 0;
-
- return __cg_mount_direct(type, h, controllerpath);
-}
-
-#ifdef HAVE_ISULAD
-__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
- struct lxc_handler *handler,
- const char *root, int type)
-{
- int i, ret;
- char *tmpfspath = NULL;
- char *systemdpath = NULL;
- char *unifiedpath = NULL;
- bool has_cgns = false, retval = false, wants_force_mount = false;
- char **merged = NULL;
-
- if ((type & LXC_AUTO_CGROUP_MASK) == 0)
- return true;
-
- if (type & LXC_AUTO_CGROUP_FORCE) {
- type &= ~LXC_AUTO_CGROUP_FORCE;
- wants_force_mount = true;
- }
-
- if (!wants_force_mount) {
- if (!lxc_list_empty(&handler->conf->keepcaps))
- wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
- else
- wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
- }
-
- has_cgns = cgns_supported();
- if (has_cgns && !wants_force_mount)
- return true;
-
- if (type == LXC_AUTO_CGROUP_NOSPEC)
- type = LXC_AUTO_CGROUP_MIXED;
- else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
- type = LXC_AUTO_CGROUP_FULL_MIXED;
-
- /* Mount tmpfs */
- tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
- if (mkdir_p(tmpfspath, 0755) < 0) {
- ERROR("Failed to create directory: %s", tmpfspath);
- goto on_error;
- }
- ret = safe_mount(NULL, tmpfspath, "tmpfs",
- MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
- "size=10240k,mode=755", root, handler->conf->lsm_se_mount_context);
- if (ret < 0)
- goto on_error;
-
- for (i = 0; ops->hierarchies[i]; i++) {
- char *controllerpath = NULL;
- char *path2 = NULL;
- struct hierarchy *h = ops->hierarchies[i];
- char *controller = strrchr(h->mountpoint, '/');
-
- if (!controller)
- continue;
- controller++;
-
- // isulad: symlink subcgroup
- if (strchr(controller, ',') != NULL) {
- int pret;
- pret = lxc_append_string(&merged, controller);
- if (pret < 0)
- goto on_error;
- }
-
- controllerpath = must_make_path(tmpfspath, controller, NULL);
- if (dir_exists(controllerpath)) {
- free(controllerpath);
- continue;
- }
-
- ret = mkdir(controllerpath, 0755);
- if (ret < 0) {
- SYSERROR("Error creating cgroup path: %s", controllerpath);
- free(controllerpath);
- goto on_error;
- }
-
- if (has_cgns && wants_force_mount) {
- /* If cgroup namespaces are supported but the container
- * will not have CAP_SYS_ADMIN after it has started we
- * need to mount the cgroups manually.
- */
- ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
- free(controllerpath);
- if (ret < 0)
- goto on_error;
-
- continue;
- }
-
- ret = cg_mount_cgroup_full(type, h, controllerpath);
- if (ret < 0) {
- free(controllerpath);
- goto on_error;
- }
-
- if (!cg_mount_needs_subdirs(type)) {
- free(controllerpath);
- continue;
- }
-
- // isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container,
- // isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container
- path2 = must_make_path(controllerpath, NULL);
- ret = mkdir_p(path2, 0755);
- if (ret < 0) {
- free(controllerpath);
- free(path2);
- goto on_error;
- }
-
- ret = cg_legacy_mount_controllers(type, h, controllerpath,
- path2, ops->container_cgroup);
- free(controllerpath);
- free(path2);
- if (ret < 0)
- goto on_error;
- }
-
- // isulad: symlink subcgroup
- if (merged) {
- char **mc = NULL;
- for (mc = merged; *mc; mc++) {
- char *token = NULL;
- char *copy = must_copy_string(*mc);
- lxc_iterate_parts(token, copy, ",") {
- int mret;
- char *link;
- link = must_make_path(tmpfspath, token, NULL);
- mret = symlink(*mc, link);
- if (mret < 0 && errno != EEXIST) {
- SYSERROR("Failed to create link %s for target %s", link, *mc);
- free(copy);
- free(link);
- goto on_error;
- }
- free(link);
- }
- free(copy);
- }
- }
-
+ flags |= MS_RDONLY;
- // isulad: remount /sys/fs/cgroup to readonly
- if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) {
- ret = mount(tmpfspath, tmpfspath, "bind",
- MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL);
- if (ret < 0) {
- SYSERROR("Failed to remount /sys/fs/cgroup.");
- goto on_error;
- }
+ if (h->version != CGROUP2_SUPER_MAGIC) {
+ controllers = lxc_string_join(",", (const char **)h->controllers, false);
+ if (!controllers)
+ return -ENOMEM;
+ fstype = "cgroup";
}
- // isulad: remount /sys/fs/cgroup/systemd to readwrite for system container
- if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0)
- {
- unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL);
- if (dir_exists(unifiedpath))
- {
- ret = umount2(unifiedpath, MNT_DETACH);
- if (ret < 0)
- {
- SYSERROR("Failed to umount /sys/fs/cgroup/unified.");
- goto on_error;
- }
- }
+ ret = mount("cgroup", controllerpath, fstype, flags, controllers);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s",
+ controllerpath, fstype);
- systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL);
- ret = mount(systemdpath, systemdpath, "bind",
- MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME | MS_BIND | MS_REMOUNT, NULL);
- if (ret < 0)
- {
- SYSERROR("Failed to remount /sys/fs/cgroup/systemd.");
- goto on_error;
- }
- }
+ DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
+ return 0;
+}
- retval = true;
+static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
+ const char *controllerpath)
+{
+ return __cg_mount_direct(type, h, controllerpath);
+}
-on_error:
- free(tmpfspath);
- if (systemdpath != NULL)
- {
- free(systemdpath);
- }
- if (unifiedpath != NULL)
- {
- free(unifiedpath);
- }
- lxc_free_array((void **)merged, free);
- return retval;
+static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
+ const char *controllerpath)
+{
+ if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
+ return 0;
+
+ return __cg_mount_direct(type, h, controllerpath);
}
-#else
+
__cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
struct lxc_handler *handler,
const char *root, int type)
@@ -2396,7 +1800,6 @@ __cgfsng_ops static bool cgfsng_mount(struct cgroup_ops *ops,
return true;
}
-#endif
/* Only root needs to escape to the cgroup of its init. */
__cgfsng_ops static bool cgfsng_escape(const struct cgroup_ops *ops,
@@ -2647,34 +2050,11 @@ __cgfsng_ops static const char *cgfsng_get_cgroup(struct cgroup_ops *ops,
return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
controller ? controller : "(null)");
-#ifdef HAVE_ISULAD
- if (!h->container_full_path)
- h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL);
-#endif
-
return h->container_full_path
? h->container_full_path + strlen(h->mountpoint)
: NULL;
}
-#ifdef HAVE_ISULAD
-__cgfsng_ops static const char *cgfsng_get_cgroup_full_path(struct cgroup_ops *ops,
- const char *controller)
-{
- struct hierarchy *h;
-
- h = get_hierarchy(ops, controller);
- if (!h)
- return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
- controller ? controller : "(null)");
-
- if (!h->container_full_path)
- h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL);
-
- return h->container_full_path;
-}
-#endif
-
/* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
* which must be freed by the caller.
*/
@@ -2981,44 +2361,6 @@ __cgfsng_ops static bool cgfsng_attach(struct cgroup_ops *ops,
return true;
}
-#ifdef HAVE_ISULAD
-__cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
- char *value, size_t len, const char *name,
- const char *lxcpath)
-{
- int ret = -1;
- size_t controller_len;
- char *controller, *p, *path;
- struct hierarchy *h;
-
- controller_len = strlen(filename);
- controller = alloca(controller_len + 1);
- (void)strlcpy(controller, filename, controller_len + 1);
-
- p = strchr(controller, '.');
- if (p)
- *p = '\0';
-
- const char *ori_path = ops->get_cgroup(ops, controller);
- if (ori_path == NULL) {
- ERROR("Failed to get cgroup path:%s", controller);
- return -1;
- }
- path = safe_strdup(ori_path);
-
- h = get_hierarchy(ops, controller);
- if (h) {
- char *fullpath;
-
- fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
- ret = lxc_read_from_file(fullpath, value, len);
- free(fullpath);
- }
- free(path);
-
- return ret;
-}
-#else
/* Called externally (i.e. from 'lxc-cgroup') to query cgroup limits. Here we
* don't have a cgroup_data set up, so we ask the running container through the
* commands API for the cgroup path.
@@ -3056,7 +2398,6 @@ __cgfsng_ops static int cgfsng_get(struct cgroup_ops *ops, const char *filename,
return ret;
}
-#endif
static int device_cgroup_parse_access(struct device_item *device, const char *val)
{
@@ -3170,44 +2511,6 @@ int device_cgroup_rule_parse(struct device_item *device, const char *key,
return device_cgroup_parse_access(device, ++val);
}
-#ifdef HAVE_ISULAD
-__cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
- const char *filename, const char *value,
- const char *name, const char *lxcpath)
-{
- int ret = -1;
- size_t controller_len;
- char *controller, *p, *path;
- struct hierarchy *h;
-
- controller_len = strlen(filename);
- controller = alloca(controller_len + 1);
- (void)strlcpy(controller, filename, controller_len + 1);
-
- p = strchr(controller, '.');
- if (p)
- *p = '\0';
-
- const char *ori_path = ops->get_cgroup(ops, controller);
- if (ori_path == NULL) {
- ERROR("Failed to get cgroup path:%s", controller);
- return -1;
- }
- path = safe_strdup(ori_path);
-
- h = get_hierarchy(ops, controller);
- if (h) {
- char *fullpath;
-
- fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
- ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
- free(fullpath);
- }
- free(path);
-
- return ret;
-}
-#else
/* Called externally (i.e. from 'lxc-cgroup') to set new cgroup limits. Here we
* don't have a cgroup_data set up, so we ask the running container through the
* commands API for the cgroup path.
@@ -3260,7 +2563,6 @@ __cgfsng_ops static int cgfsng_set(struct cgroup_ops *ops,
return ret;
}
-#endif
/* take devices cgroup line
* /dev/foo rwx
@@ -3352,7 +2654,6 @@ static int convert_devpath(const char *invalue, char *dest)
return 0;
}
-#ifndef HAVE_ISULAD
/* Called from setup_limits - here we have the container's cgroup_data because
* we created the cgroups.
*/
@@ -3385,212 +2686,7 @@ static int cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
return lxc_write_openat(h->container_full_path, filename, value, strlen(value));
}
-#endif
-
-#ifdef HAVE_ISULAD
-/* Called from setup_limits - here we have the container's cgroup_data because
- * we created the cgroups.
- */
-static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename,
- char *value, size_t len)
-{
- char *fullpath = NULL;
- char *p = NULL;
- struct hierarchy *h = NULL;
- int ret = 0;
- char *controller = NULL;
-
- len = strlen(filename);
- if (SIZE_MAX - 1 < len) {
- errno = EINVAL;
- return -1;
- }
- controller = calloc(1, len + 1);
- if (controller == NULL) {
- errno = ENOMEM;
- return -1;
- }
- (void)strlcpy(controller, filename, len + 1);
-
- p = strchr(controller, '.');
- if (p)
- *p = '\0';
-
-
- h = get_hierarchy(ops, controller);
- if (!h) {
- ERROR("Failed to setup limits for the \"%s\" controller. "
- "The controller seems to be unused by \"cgfsng\" cgroup "
- "driver or not enabled on the cgroup hierarchy",
- controller);
- errno = ENOENT;
- free(controller);
- return -ENOENT;
- }
-
- fullpath = must_make_path(h->container_full_path, filename, NULL);
- ret = lxc_read_from_file(fullpath, value, len);
- free(fullpath);
- free(controller);
- return ret;
-}
-
-static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
- const char *value)
-{
- size_t len;
- char *fullpath, *p;
- /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
- char converted_value[50];
- struct hierarchy *h;
- int ret = 0;
- char *controller = NULL;
- int retry_count = 0;
- int max_retry = 10;
- char *container_cgroup = ops->container_cgroup;
-
- len = strlen(filename);
- controller = alloca(len + 1);
- (void)strlcpy(controller, filename, len + 1);
-
- p = strchr(controller, '.');
- if (p)
- *p = '\0';
-
- if (strcmp("devices.allow", filename) == 0 && value[0] == '/') {
- ret = convert_devpath(value, converted_value);
- if (ret < 0)
- return ret;
- value = converted_value;
- }
-
- h = get_hierarchy(ops, controller);
- if (!h) {
- ERROR("Failed to setup limits for the \"%s\" controller. "
- "The controller seems to be unused by \"cgfsng\" cgroup "
- "driver or not enabled on the cgroup hierarchy",
- controller);
- errno = ENOENT;
- return -ENOENT;
- }
-
- fullpath = must_make_path(h->container_full_path, filename, NULL);
-
-retry:
- ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
- if (ret != 0) {
- if (retry_count < max_retry) {
- SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath);
- (void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup);
- (void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755);
- usleep(100 * 1000); /* 100 millisecond */
- retry_count++;
- goto retry;
- }
- lxc_write_error_message(ops->errfd,
- "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".",
- __FILE__, __LINE__, value, fullpath, strerror(errno));
- }
- free(fullpath);
- return ret;
-}
-
-__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
- struct lxc_conf *conf,
- bool do_devices)
-{
- __do_free struct lxc_list *sorted_cgroup_settings = NULL;
- struct lxc_list *cgroup_settings = &conf->cgroup;
- struct lxc_list *iterator, *next;
- struct lxc_cgroup *cg;
- bool ret = false;
- char value[21 + 1] = { 0 };
- long long int readvalue, setvalue;
-
- if (!ops)
- return ret_set_errno(false, ENOENT);
-
- if (!conf)
- return ret_set_errno(false, EINVAL);
-
- cgroup_settings = &conf->cgroup;
- if (lxc_list_empty(cgroup_settings))
- return true;
-
- if (!ops->hierarchies)
- return ret_set_errno(false, EINVAL);
-
- sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
- if (!sorted_cgroup_settings)
- return false;
-
- lxc_list_for_each(iterator, sorted_cgroup_settings) {
- cg = iterator->elem;
-
- if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
- const char *cgvalue = cg->value;
- if (strcmp(cg->subsystem, "files.limit") == 0) {
- if (lxc_safe_long_long(cgvalue, &setvalue) != 0) {
- SYSERROR("Invalid integer value %s", cgvalue);
- goto out;
- }
- if (setvalue <= 0) {
- cgvalue = "max";
- }
- }
- if (isulad_cg_legacy_set_data(ops, cg->subsystem, cgvalue)) {
- if (do_devices && (errno == EACCES || errno == EPERM)) {
- SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue);
- continue;
- }
- SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue);
- goto out;
- }
- DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cgvalue);
- }
-
- // isulad: check cpu shares
- if (strcmp(cg->subsystem, "cpu.shares") == 0) {
- if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) {
- SYSERROR("Error get %s", cg->subsystem);
- goto out;
- }
- trim(value);
- if (lxc_safe_long_long(cg->value, &setvalue) != 0) {
- SYSERROR("Invalid value %s", cg->value);
- goto out;
- }
- if (lxc_safe_long_long(value, &readvalue) != 0) {
- SYSERROR("Invalid value %s", value);
- goto out;
- }
- if (setvalue > readvalue) {
- ERROR("The maximum allowed cpu-shares is %s", value);
- lxc_write_error_message(ops->errfd,
- "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".",
- __FILE__, __LINE__, value);
- goto out;
- } else if (setvalue < readvalue) {
- ERROR("The minimum allowed cpu-shares is %s", value);
- lxc_write_error_message(ops->errfd,
- "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".",
- __FILE__, __LINE__, value);
- goto out;
- }
- }
- }
-
- ret = true;
- INFO("Limits for the legacy cgroup hierarchies have been setup");
-out:
- lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
- lxc_list_del(iterator);
- free(iterator);
- }
- return ret;
-}
-#else
__cgfsng_ops static bool cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
struct lxc_conf *conf,
bool do_devices)
@@ -3644,7 +2740,6 @@ out:
return ret;
}
-#endif
/*
* Some of the parsing logic comes from the original cgroup device v1
@@ -3856,12 +2951,6 @@ bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
return true;
}
-#ifdef HAVE_ISULAD
-__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
-{
- return true;
-}
-#else
__cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
{
if (!ops)
@@ -3869,7 +2958,6 @@ __cgfsng_ops bool cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
return __cgfsng_delegate_controllers(ops, ops->monitor_cgroup);
}
-#endif
__cgfsng_ops bool cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
{
@@ -4019,22 +3107,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
trim(base_cgroup);
prune_init_scope(base_cgroup);
-#ifdef HAVE_ISULAD
- /* isulad: do not test writeable, if we run isulad in docker without cgroup namespace.
- * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */
-
- /*
- * reason:base cgroup may be started with /system.slice when cg_hybrid_init
- * read /proc/1/cgroup on host, and cgroup init will set all containers
- * cgroup path under /sys/fs/cgroup/<controller>/system.slice/xxx/lxc
- * directory, this is not consistent with docker. The default cgroup path
- * should be under /sys/fs/cgroup/<controller>/lxc directory.
- */
- if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') {
- base_cgroup[1] = '\0';
- }
-#else
bool writeable;
if (type == CGROUP2_SUPER_MAGIC)
writeable = test_writeable_v2(mountpoint, base_cgroup);
@@ -4044,7 +3117,7 @@ static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileg
TRACE("The %s group is not writeable", base_cgroup);
continue;
}
-#endif
+
if (type == CGROUP2_SUPER_MAGIC) {
char *cgv2_ctrl_path;
@@ -4197,44 +3270,6 @@ static int cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
}
-#ifdef HAVE_ISULAD
-__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
-{
- const char *cgroup_pattern;
- const char *cgroup_tree;
- __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL;
- size_t len;
-
- if (!ops)
- return ret_set_errno(-1, ENOENT);
-
- /* copy system-wide cgroup information */
- cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
- if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0)
- ops->cgroup_pattern = must_copy_string(cgroup_pattern);
-
- if (conf->cgroup_meta.dir) {
- cgroup_tree = conf->cgroup_meta.dir;
- container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL);
- } else if (ops->cgroup_pattern) {
- __cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern);
- if (!__cgroup_tree)
- return ret_set_errno(-1, ENOMEM);
-
- cgroup_tree = __cgroup_tree;
- container_cgroup = must_concat(&len, cgroup_tree, NULL);
- } else {
- cgroup_tree = NULL;
- container_cgroup = must_concat(&len, conf->name, NULL);
- }
- if (!container_cgroup)
- return ret_set_errno(-1, ENOMEM);
-
- ops->container_cgroup = move_ptr(container_cgroup);
-
- return 0;
-}
-#else
__cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
{
const char *cgroup_pattern;
@@ -4249,7 +3284,6 @@ __cgfsng_ops static int cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf
return 0;
}
-#endif
struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
{
@@ -4266,12 +3300,7 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
return NULL;
cgfsng_ops->data_init = cgfsng_data_init;
-#ifdef HAVE_ISULAD
- cgfsng_ops->errfd = conf ? conf->errpipe[1] : -1;
- cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy;
-#else
cgfsng_ops->payload_destroy = cgfsng_payload_destroy;
-#endif
cgfsng_ops->monitor_destroy = cgfsng_monitor_destroy;
cgfsng_ops->monitor_create = cgfsng_monitor_create;
cgfsng_ops->monitor_enter = cgfsng_monitor_enter;
@@ -4284,9 +3313,6 @@ struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
cgfsng_ops->num_hierarchies = cgfsng_num_hierarchies;
cgfsng_ops->get_hierarchies = cgfsng_get_hierarchies;
cgfsng_ops->get_cgroup = cgfsng_get_cgroup;
-#ifdef HAVE_ISULAD
- cgfsng_ops->get_cgroup_full_path = cgfsng_get_cgroup_full_path;
-#endif
cgfsng_ops->get = cgfsng_get;
cgfsng_ops->set = cgfsng_set;
cgfsng_ops->freeze = cgfsng_freeze;
diff --git a/src/lxc/cgroups/isulad_cgfsng.c b/src/lxc/cgroups/isulad_cgfsng.c
new file mode 100644
index 000000000..82a4333f3
--- /dev/null
+++ b/src/lxc/cgroups/isulad_cgfsng.c
@@ -0,0 +1,3115 @@
+/******************************************************************************
+ * Copyright (c) Huawei Technologies Co., Ltd. 2019. All rights reserved.
+ * Author: lifeng
+ * Create: 2020-11-02
+ * Description: provide container definition
+ * lxc: linux Container library
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ******************************************************************************/
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <grp.h>
+#include <linux/kdev_t.h>
+#include <linux/types.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "af_unix.h"
+#include "caps.h"
+#include "cgroup.h"
+#include "cgroup2_devices.h"
+#include "cgroup_utils.h"
+#include "commands.h"
+#include "conf.h"
+#include "config.h"
+#include "log.h"
+#include "macro.h"
+#include "mainloop.h"
+#include "memory_utils.h"
+#include "storage/storage.h"
+#include "utils.h"
+
+#ifndef HAVE_STRLCPY
+#include "include/strlcpy.h"
+#endif
+
+#ifndef HAVE_STRLCAT
+#include "include/strlcat.h"
+#endif
+
+lxc_log_define(isulad_cgfsng, cgroup);
+
+/* Given a pointer to a null-terminated array of pointers, realloc to add one
+ * entry, and point the new entry to NULL. Do not fail. Return the index to the
+ * second-to-last entry - that is, the one which is now available for use
+ * (keeping the list null-terminated).
+ */
+static int append_null_to_list(void ***list)
+{
+ int newentry = 0;
+
+ if (*list)
+ for (; (*list)[newentry]; newentry++)
+ ;
+
+ *list = must_realloc(*list, (newentry + 2) * sizeof(void **));
+ (*list)[newentry + 1] = NULL;
+ return newentry;
+}
+
+/* Given a null-terminated array of strings, check whether @entry is one of the
+ * strings.
+ */
+static bool string_in_list(char **list, const char *entry)
+{
+ if (!list)
+ return false;
+
+ for (int i = 0; list[i]; i++)
+ if (strcmp(list[i], entry) == 0)
+ return true;
+
+ return false;
+}
+
+/* Return a copy of @entry prepending "name=", i.e. turn "systemd" into
+ * "name=systemd". Do not fail.
+ */
+static char *cg_legacy_must_prefix_named(char *entry)
+{
+ size_t len;
+ char *prefixed;
+
+ len = strlen(entry);
+ prefixed = must_realloc(NULL, len + 6);
+
+ memcpy(prefixed, "name=", STRLITERALLEN("name="));
+ memcpy(prefixed + STRLITERALLEN("name="), entry, len);
+ prefixed[len + 5] = '\0';
+
+ return prefixed;
+}
+
+/* Append an entry to the clist. Do not fail. @clist must be NULL the first time
+ * we are called.
+ *
+ * We also handle named subsystems here. Any controller which is not a kernel
+ * subsystem, we prefix "name=". Any which is both a kernel and named subsystem,
+ * we refuse to use because we're not sure which we have here.
+ * (TODO: We could work around this in some cases by just remounting to be
+ * unambiguous, or by comparing mountpoint contents with current cgroup.)
+ *
+ * The last entry will always be NULL.
+ */
+static void must_append_controller(char **klist, char **nlist, char ***clist,
+ char *entry)
+{
+ int newentry;
+ char *copy;
+
+ if (string_in_list(klist, entry) && string_in_list(nlist, entry)) {
+ ERROR("Refusing to use ambiguous controller \"%s\"", entry);
+ ERROR("It is both a named and kernel subsystem");
+ return;
+ }
+
+ newentry = append_null_to_list((void ***)clist);
+
+ if (strncmp(entry, "name=", 5) == 0)
+ copy = must_copy_string(entry);
+ else if (string_in_list(klist, entry))
+ copy = must_copy_string(entry);
+ else
+ copy = cg_legacy_must_prefix_named(entry);
+
+ (*clist)[newentry] = copy;
+}
+
+/* Given a handler's cgroup data, return the struct hierarchy for the controller
+ * @c, or NULL if there is none.
+ */
+struct hierarchy *get_hierarchy(struct cgroup_ops *ops, const char *controller)
+{
+ if (!ops->hierarchies)
+ return log_trace_errno(NULL, errno, "There are no useable cgroup controllers");
+
+ for (int i = 0; ops->hierarchies[i]; i++) {
+ if (!controller) {
+ /* This is the empty unified hierarchy. */
+ if (ops->hierarchies[i]->controllers &&
+ !ops->hierarchies[i]->controllers[0])
+ return ops->hierarchies[i];
+ continue;
+ } else if (pure_unified_layout(ops) &&
+ strcmp(controller, "devices") == 0) {
+ if (ops->unified->bpf_device_controller)
+ return ops->unified;
+ break;
+ }
+
+ if (string_in_list(ops->hierarchies[i]->controllers, controller))
+ return ops->hierarchies[i];
+ }
+
+ if (controller)
+ WARN("There is no useable %s controller", controller);
+ else
+ WARN("There is no empty unified cgroup hierarchy");
+
+ return ret_set_errno(NULL, ENOENT);
+}
+
+#define BATCH_SIZE 50
+static void batch_realloc(char **mem, size_t oldlen, size_t newlen)
+{
+ int newbatches = (newlen / BATCH_SIZE) + 1;
+ int oldbatches = (oldlen / BATCH_SIZE) + 1;
+
+ if (!*mem || newbatches > oldbatches)
+ *mem = must_realloc(*mem, newbatches * BATCH_SIZE);
+}
+
+static void append_line(char **dest, size_t oldlen, char *new, size_t newlen)
+{
+ size_t full = oldlen + newlen;
+
+ batch_realloc(dest, oldlen, full + 1);
+
+ memcpy(*dest + oldlen, new, newlen + 1);
+}
+
+/* Slurp in a whole file */
+static char *read_file(const char *fnam)
+{
+ __do_free char *buf = NULL, *line = NULL;
+ __do_fclose FILE *f = NULL;
+ size_t len = 0, fulllen = 0;
+ int linelen;
+
+ f = fopen(fnam, "re");
+ if (!f)
+ return NULL;
+
+ while ((linelen = getline(&line, &len, f)) != -1) {
+ append_line(&buf, fulllen, line, linelen);
+ fulllen += linelen;
+ }
+
+ return move_ptr(buf);
+}
+
+static inline bool is_unified_hierarchy(const struct hierarchy *h)
+{
+ return h->version == CGROUP2_SUPER_MAGIC;
+}
+
+/* Given two null-terminated lists of strings, return true if any string is in
+ * both.
+ */
+static bool controller_lists_intersect(char **l1, char **l2)
+{
+ if (!l1 || !l2)
+ return false;
+
+ for (int i = 0; l1[i]; i++)
+ if (string_in_list(l2, l1[i]))
+ return true;
+
+ return false;
+}
+
+/* For a null-terminated list of controllers @clist, return true if any of those
+ * controllers is already listed the null-terminated list of hierarchies @hlist.
+ * Realistically, if one is present, all must be present.
+ */
+static bool controller_list_is_dup(struct hierarchy **hlist, char **clist)
+{
+ if (!hlist)
+ return false;
+
+ for (int i = 0; hlist[i]; i++)
+ if (controller_lists_intersect(hlist[i]->controllers, clist))
+ return true;
+
+ return false;
+}
+
+/* Return true if the controller @entry is found in the null-terminated list of
+ * hierarchies @hlist.
+ */
+static bool controller_found(struct hierarchy **hlist, char *entry)
+{
+ if (!hlist)
+ return false;
+
+ for (int i = 0; hlist[i]; i++)
+ if (string_in_list(hlist[i]->controllers, entry))
+ return true;
+
+ return false;
+}
+
+/* Return true if all of the controllers which we require have been found. The
+ * required list is freezer and anything in lxc.cgroup.use.
+ */
+static bool all_controllers_found(struct cgroup_ops *ops)
+{
+ struct hierarchy **hlist;
+
+ if (!ops->cgroup_use)
+ return true;
+
+ hlist = ops->hierarchies;
+ for (char **cur = ops->cgroup_use; cur && *cur; cur++)
+ if (!controller_found(hlist, *cur))
+ return log_error(false, "No %s controller mountpoint found", *cur);
+
+ return true;
+}
+
+/* Get the controllers from a mountinfo line There are other ways we could get
+ * this info. For lxcfs, field 3 is /cgroup/controller-list. For cgroupfs, we
+ * could parse the mount options. But we simply assume that the mountpoint must
+ * be /sys/fs/cgroup/controller-list
+ */
+static char **cg_hybrid_get_controllers(char **klist, char **nlist, char *line,
+ int type)
+{
+ /* The fourth field is /sys/fs/cgroup/comma-delimited-controller-list
+ * for legacy hierarchies.
+ */
+ __do_free_string_list char **aret = NULL;
+ int i;
+ char *p2, *tok;
+ char *p = line, *sep = ",";
+
+ for (i = 0; i < 4; i++) {
+ p = strchr(p, ' ');
+ if (!p)
+ return NULL;
+ p++;
+ }
+
+ /* Note, if we change how mountinfo works, then our caller will need to
+ * verify /sys/fs/cgroup/ in this field.
+ */
+ if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0)
+ return log_error(NULL, "Found hierarchy not under " DEFAULT_CGROUP_MOUNTPOINT ": \"%s\"", p);
+
+ p += 15;
+ p2 = strchr(p, ' ');
+ if (!p2)
+ return log_error(NULL, "Corrupt mountinfo");
+ *p2 = '\0';
+
+ if (type == CGROUP_SUPER_MAGIC) {
+ __do_free char *dup = NULL;
+
+ /* strdup() here for v1 hierarchies. Otherwise
+ * lxc_iterate_parts() will destroy mountpoints such as
+ * "/sys/fs/cgroup/cpu,cpuacct".
+ */
+ dup = must_copy_string(p);
+ if (!dup)
+ return NULL;
+
+ lxc_iterate_parts (tok, dup, sep)
+ must_append_controller(klist, nlist, &aret, tok);
+ }
+ *p2 = ' ';
+
+ return move_ptr(aret);
+}
+
+static char **cg_unified_make_empty_controller(void)
+{
+ __do_free_string_list char **aret = NULL;
+ int newentry;
+
+ newentry = append_null_to_list((void ***)&aret);
+ aret[newentry] = NULL;
+ return move_ptr(aret);
+}
+
+static char **cg_unified_get_controllers(const char *file)
+{
+ __do_free char *buf = NULL;
+ __do_free_string_list char **aret = NULL;
+ char *sep = " \t\n";
+ char *tok;
+
+ buf = read_file(file);
+ if (!buf)
+ return NULL;
+
+ lxc_iterate_parts(tok, buf, sep) {
+ int newentry;
+ char *copy;
+
+ newentry = append_null_to_list((void ***)&aret);
+ copy = must_copy_string(tok);
+ aret[newentry] = copy;
+ }
+
+ return move_ptr(aret);
+}
+
+static struct hierarchy *add_hierarchy(struct hierarchy ***h, char **clist, char *mountpoint,
+ char *container_base_path, int type)
+{
+ struct hierarchy *new;
+ int newentry;
+
+ new = zalloc(sizeof(*new));
+ new->controllers = clist;
+ new->mountpoint = mountpoint;
+ new->container_base_path = container_base_path;
+ new->version = type;
+ new->cgfd_con = -EBADF;
+ new->cgfd_mon = -EBADF;
+
+ newentry = append_null_to_list((void ***)h);
+ (*h)[newentry] = new;
+ return new;
+}
+
+/* Get a copy of the mountpoint from @line, which is a line from
+ * /proc/self/mountinfo.
+ */
+static char *cg_hybrid_get_mountpoint(char *line)
+{
+ char *p = line, *sret = NULL;
+ size_t len;
+ char *p2;
+
+ for (int i = 0; i < 4; i++) {
+ p = strchr(p, ' ');
+ if (!p)
+ return NULL;
+ p++;
+ }
+
+ if (strncmp(p, DEFAULT_CGROUP_MOUNTPOINT "/", 15) != 0)
+ return NULL;
+
+ p2 = strchr(p + 15, ' ');
+ if (!p2)
+ return NULL;
+ *p2 = '\0';
+
+ len = strlen(p);
+ sret = must_realloc(NULL, len + 1);
+ memcpy(sret, p, len);
+ sret[len] = '\0';
+
+ return sret;
+}
+
+/* Given a multi-line string, return a null-terminated copy of the current line. */
+static char *copy_to_eol(char *p)
+{
+ char *p2, *sret;
+ size_t len;
+
+ p2 = strchr(p, '\n');
+ if (!p2)
+ return NULL;
+
+ len = p2 - p;
+ sret = must_realloc(NULL, len + 1);
+ memcpy(sret, p, len);
+ sret[len] = '\0';
+
+ return sret;
+}
+
+/* cgline: pointer to character after the first ':' in a line in a \n-terminated
+ * /proc/self/cgroup file. Check whether controller c is present.
+ */
+static bool controller_in_clist(char *cgline, char *c)
+{
+ __do_free char *tmp = NULL;
+ char *tok, *eol;
+ size_t len;
+
+ eol = strchr(cgline, ':');
+ if (!eol)
+ return false;
+
+ len = eol - cgline;
+ tmp = must_realloc(NULL, len + 1);
+ memcpy(tmp, cgline, len);
+ tmp[len] = '\0';
+
+ lxc_iterate_parts(tok, tmp, ",")
+ if (strcmp(tok, c) == 0)
+ return true;
+
+ return false;
+}
+
+/* @basecginfo is a copy of /proc/$$/cgroup. Return the current cgroup for
+ * @controller.
+ */
+static char *cg_hybrid_get_current_cgroup(char *basecginfo, char *controller,
+ int type)
+{
+ char *p = basecginfo;
+
+ for (;;) {
+ bool is_cgv2_base_cgroup = false;
+
+ /* cgroup v2 entry in "/proc/<pid>/cgroup": "0::/some/path" */
+ if ((type == CGROUP2_SUPER_MAGIC) && (*p == '0'))
+ is_cgv2_base_cgroup = true;
+
+ p = strchr(p, ':');
+ if (!p)
+ return NULL;
+ p++;
+
+ if (is_cgv2_base_cgroup || (controller && controller_in_clist(p, controller))) {
+ p = strchr(p, ':');
+ if (!p)
+ return NULL;
+ p++;
+ return copy_to_eol(p);
+ }
+
+ p = strchr(p, '\n');
+ if (!p)
+ return NULL;
+ p++;
+ }
+}
+
+static void must_append_string(char ***list, char *entry)
+{
+ int newentry;
+ char *copy;
+
+ newentry = append_null_to_list((void ***)list);
+ copy = must_copy_string(entry);
+ (*list)[newentry] = copy;
+}
+
+static int get_existing_subsystems(char ***klist, char ***nlist)
+{
+ __do_free char *line = NULL;
+ __do_fclose FILE *f = NULL;
+ size_t len = 0;
+
+ f = fopen("/proc/self/cgroup", "re");
+ if (!f)
+ return -1;
+
+ while (getline(&line, &len, f) != -1) {
+ char *p, *p2, *tok;
+ p = strchr(line, ':');
+ if (!p)
+ continue;
+ p++;
+ p2 = strchr(p, ':');
+ if (!p2)
+ continue;
+ *p2 = '\0';
+
+ /* If the kernel has cgroup v2 support, then /proc/self/cgroup
+ * contains an entry of the form:
+ *
+ * 0::/some/path
+ *
+ * In this case we use "cgroup2" as controller name.
+ */
+ if ((p2 - p) == 0) {
+ must_append_string(klist, "cgroup2");
+ continue;
+ }
+
+ lxc_iterate_parts(tok, p, ",") {
+ if (strncmp(tok, "name=", 5) == 0)
+ must_append_string(nlist, tok);
+ else
+ must_append_string(klist, tok);
+ }
+ }
+
+ return 0;
+}
+
+static char *trim(char *s)
+{
+ size_t len;
+
+ len = strlen(s);
+ while ((len > 1) && (s[len - 1] == '\n'))
+ s[--len] = '\0';
+
+ return s;
+}
+
+static void lxc_cgfsng_print_hierarchies(struct cgroup_ops *ops)
+{
+ int i;
+ struct hierarchy **it;
+
+ if (!ops->hierarchies) {
+ TRACE(" No hierarchies found");
+ return;
+ }
+
+ TRACE(" Hierarchies:");
+ for (i = 0, it = ops->hierarchies; it && *it; it++, i++) {
+ int j;
+ char **cit;
+
+ TRACE(" %d: base_cgroup: %s", i, (*it)->container_base_path ? (*it)->container_base_path : "(null)");
+ TRACE(" mountpoint: %s", (*it)->mountpoint ? (*it)->mountpoint : "(null)");
+ TRACE(" controllers:");
+ for (j = 0, cit = (*it)->controllers; cit && *cit; cit++, j++)
+ TRACE(" %d: %s", j, *cit);
+ }
+}
+
+static void lxc_cgfsng_print_basecg_debuginfo(char *basecginfo, char **klist,
+ char **nlist)
+{
+ int k;
+ char **it;
+
+ TRACE("basecginfo is:");
+ TRACE("%s", basecginfo);
+
+ for (k = 0, it = klist; it && *it; it++, k++)
+ TRACE("kernel subsystem %d: %s", k, *it);
+
+ for (k = 0, it = nlist; it && *it; it++, k++)
+ TRACE("named subsystem %d: %s", k, *it);
+}
+
+struct generic_userns_exec_data {
+ struct hierarchy **hierarchies;
+ const char *container_cgroup;
+ struct lxc_conf *conf;
+ uid_t origuid; /* target uid in parent namespace */
+ char *path;
+};
+
+static int isulad_cgroup_tree_remove(struct hierarchy **hierarchies,
+ const char *container_cgroup)
+{
+ if (!container_cgroup || !hierarchies)
+ return 0;
+
+ for (int i = 0; hierarchies[i]; i++) {
+ struct hierarchy *h = hierarchies[i];
+ int ret;
+
+ if (!h->container_full_path) {
+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, container_cgroup, NULL);
+ }
+
+ ret = lxc_rm_rf(h->container_full_path);
+ if (ret < 0) {
+ SYSERROR("Failed to destroy \"%s\"", h->container_full_path);
+ return -1;
+ }
+
+ free_disarm(h->container_full_path);
+ }
+
+ return 0;
+}
+
+static int isulad_cgroup_tree_remove_wrapper(void *data)
+{
+ struct generic_userns_exec_data *arg = data;
+ uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
+ gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
+ int ret;
+
+ if (!lxc_setgroups(0, NULL) && errno != EPERM)
+ return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)");
+
+ ret = setresgid(nsgid, nsgid, nsgid);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)",
+ (int)nsgid, (int)nsgid, (int)nsgid);
+
+ ret = setresuid(nsuid, nsuid, nsuid);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)",
+ (int)nsuid, (int)nsuid, (int)nsuid);
+
+ return isulad_cgroup_tree_remove(arg->hierarchies, arg->container_cgroup);
+}
+
+__cgfsng_ops static bool isulad_cgfsng_payload_destroy(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ int ret;
+
+ if (!ops) {
+ ERROR("Called with uninitialized cgroup operations");
+ return false;
+ }
+
+ if (!ops->hierarchies) {
+ return false;
+ }
+
+ if (!handler) {
+ ERROR("Called with uninitialized handler");
+ return false;
+ }
+
+ if (!handler->conf) {
+ ERROR("Called with uninitialized conf");
+ return false;
+ }
+
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+ ret = bpf_program_cgroup_detach(handler->conf->cgroup2_devices);
+ if (ret < 0)
+ WARN("Failed to detach bpf program from cgroup");
+#endif
+
+ if (handler->conf && !lxc_list_empty(&handler->conf->id_map)) {
+ struct generic_userns_exec_data wrap = {
+ .conf = handler->conf,
+ .container_cgroup = ops->container_cgroup,
+ .hierarchies = ops->hierarchies,
+ .origuid = 0,
+ };
+ ret = userns_exec_1(handler->conf, isulad_cgroup_tree_remove_wrapper,
+ &wrap, "cgroup_tree_remove_wrapper");
+ } else {
+ ret = isulad_cgroup_tree_remove(ops->hierarchies, ops->container_cgroup);
+ }
+ if (ret < 0) {
+ SYSWARN("Failed to destroy cgroups");
+ return false;
+ }
+
+ return true;
+}
+
+__cgfsng_ops static void isulad_cgfsng_monitor_destroy(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ return;
+}
+
+__cgfsng_ops static inline bool isulad_cgfsng_monitor_create(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ return true;
+}
+
+static bool isulad_copy_parent_file(char *path, char *file)
+{
+ int ret;
+ int len = 0;
+ char *value = NULL;
+ char *current = NULL;
+ char *fpath = NULL;
+ char *lastslash = NULL;
+ char oldv;
+
+ fpath = must_make_path(path, file, NULL);
+ current = read_file(fpath);
+
+ if (current == NULL) {
+ SYSERROR("Failed to read file \"%s\"", fpath);
+ free(fpath);
+ return false;
+ }
+
+ if (strcmp(current, "\n") != 0) {
+ free(fpath);
+ free(current);
+ return true;
+ }
+
+ free(fpath);
+ free(current);
+
+ lastslash = strrchr(path, '/');
+ if (lastslash == NULL) {
+ ERROR("Failed to detect \"/\" in \"%s\"", path);
+ return false;
+ }
+ oldv = *lastslash;
+ *lastslash = '\0';
+ fpath = must_make_path(path, file, NULL);
+ *lastslash = oldv;
+ len = lxc_read_from_file(fpath, NULL, 0);
+ if (len <= 0)
+ goto on_error;
+
+ value = must_realloc(NULL, len + 1);
+ ret = lxc_read_from_file(fpath, value, len);
+ if (ret != len)
+ goto on_error;
+ free(fpath);
+
+ fpath = must_make_path(path, file, NULL);
+ ret = lxc_write_to_file(fpath, value, len, false, 0666);
+ if (ret < 0)
+ SYSERROR("Failed to write \"%s\" to file \"%s\"", value, fpath);
+ free(fpath);
+ free(value);
+ return ret >= 0;
+
+on_error:
+ SYSERROR("Failed to read file \"%s\"", fpath);
+ free(fpath);
+ free(value);
+ return false;
+}
+
+static bool build_sub_cpuset_cgroup_dir(char *cgpath)
+{
+ int ret;
+
+ ret = mkdir_p(cgpath, 0755);
+ if (ret < 0) {
+ if (errno != EEXIST) {
+ SYSERROR("Failed to create directory \"%s\"", cgpath);
+ return false;
+ }
+ }
+
+ /* copy parent's settings */
+ if (!isulad_copy_parent_file(cgpath, "cpuset.cpus")) {
+ SYSERROR("Failed to copy \"cpuset.cpus\" settings");
+ return false;
+ }
+
+ /* copy parent's settings */
+ if (!isulad_copy_parent_file(cgpath, "cpuset.mems")) {
+ SYSERROR("Failed to copy \"cpuset.mems\" settings");
+ return false;
+ }
+
+ return true;
+}
+
+static bool isulad_cg_legacy_handle_cpuset_hierarchy(struct hierarchy *h, char *cgname)
+{
+ char *cgpath, *slash;
+ bool sub_mk_success = false;
+
+ if (!string_in_list(h->controllers, "cpuset"))
+ return true;
+
+ cgname += strspn(cgname, "/");
+
+ slash = strchr(cgname, '/');
+
+ if (slash != NULL) {
+ while (slash) {
+ *slash = '\0';
+ cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+ sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
+ free(cgpath);
+ *slash = '/';
+ if (!sub_mk_success) {
+ return false;
+ }
+ slash = strchr(slash + 1, '/');
+ }
+ }
+
+ cgpath = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+ sub_mk_success = build_sub_cpuset_cgroup_dir(cgpath);
+ free(cgpath);
+ if (!sub_mk_success) {
+ return false;
+ }
+
+ return true;
+}
+
+static int isulad_mkdir_eexist_on_last(const char *dir, mode_t mode)
+{
+ const char *tmp = dir;
+ const char *orig = dir;
+
+ do {
+ int ret;
+ size_t cur_len;
+ char *makeme;
+
+ dir = tmp + strspn(tmp, "/");
+ tmp = dir + strcspn(dir, "/");
+
+ errno = ENOMEM;
+ cur_len = dir - orig;
+ makeme = strndup(orig, cur_len);
+ if (!makeme)
+ return -1;
+
+ ret = mkdir(makeme, mode);
+ if (ret < 0) {
+ if (errno != EEXIST) {
+ SYSERROR("Failed to create directory \"%s\"", makeme);
+ free(makeme);
+ return -1;
+ }
+ }
+ free(makeme);
+
+ } while (tmp != dir);
+
+ return 0;
+}
+
+static bool create_path_for_hierarchy(struct hierarchy *h, char *cgname, int errfd)
+{
+ int ret;
+ __do_free char *path = NULL;
+
+ path = must_make_path(h->mountpoint, h->container_base_path, cgname, NULL);
+
+ if (file_exists(path)) { // it must not already exist
+ ERROR("Cgroup path \"%s\" already exist.", path);
+ lxc_write_error_message(errfd, "%s:%d: Cgroup path \"%s\" already exist.",
+ __FILE__, __LINE__, path);
+ return false;
+ }
+
+ if (!isulad_cg_legacy_handle_cpuset_hierarchy(h, cgname)) {
+ ERROR("Failed to handle legacy cpuset controller");
+ return false;
+ }
+
+ ret = isulad_mkdir_eexist_on_last(path, 0755);
+ if (ret < 0) {
+ ERROR("Failed to create cgroup \"%s\"", path);
+ return false;
+ }
+
+ h->cgfd_con = lxc_open_dirfd(path);
+ if (h->cgfd_con < 0)
+ return log_error_errno(false, errno, "Failed to open %s", path);
+
+ if (h->container_full_path == NULL) {
+ h->container_full_path = move_ptr(path);
+ }
+
+ return true;
+}
+
+/* isulad: create hierarchies path, if fail, return the error */
+__cgfsng_ops static inline bool isulad_cgfsng_payload_create(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ int i;
+ char *container_cgroup = ops->container_cgroup;
+
+ if (!container_cgroup) {
+ ERROR("cgfsng_create container_cgroup is invalid");
+ return false;
+ }
+
+ for (i = 0; ops->hierarchies[i]; i++) {
+ if (!create_path_for_hierarchy(ops->hierarchies[i], container_cgroup, ops->errfd)) {
+ SYSERROR("Failed to create %s", ops->hierarchies[i]->container_full_path);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_monitor_enter(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ return true;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_payload_enter(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ int len;
+ char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ if (!ops->hierarchies)
+ return true;
+
+ if (!ops->container_cgroup)
+ return ret_set_errno(false, ENOENT);
+
+ if (!handler || !handler->conf)
+ return ret_set_errno(false, EINVAL);
+
+ len = snprintf(pidstr, sizeof(pidstr), "%d", handler->pid);
+
+ for (int i = 0; ops->hierarchies[i]; i++) {
+ int ret;
+ char *fullpath;
+ int retry_count = 0;
+ int max_retry = 10;
+
+ fullpath = must_make_path(ops->hierarchies[i]->container_full_path,
+ "cgroup.procs", NULL);
+retry:
+ ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
+ if (ret != 0) {
+ if (retry_count < max_retry) {
+ SYSERROR("Failed to enter cgroup \"%s\" with retry count:%d", fullpath, retry_count);
+ (void)isulad_cg_legacy_handle_cpuset_hierarchy(ops->hierarchies[i], ops->container_cgroup);
+ (void)isulad_mkdir_eexist_on_last(ops->hierarchies[i]->container_full_path, 0755);
+ usleep(100 * 1000); /* 100 millisecond */
+ retry_count++;
+ goto retry;
+ }
+ SYSERROR("Failed to enter cgroup \"%s\"", fullpath);
+ free(fullpath);
+ return false;
+ }
+ free(fullpath);
+ }
+
+ return true;
+}
+
+static int fchowmodat(int dirfd, const char *path, uid_t chown_uid,
+ gid_t chown_gid, mode_t chmod_mode)
+{
+ int ret;
+
+ ret = fchownat(dirfd, path, chown_uid, chown_gid,
+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ if (ret < 0)
+ return log_warn_errno(-1,
+ errno, "Failed to fchownat(%d, %s, %d, %d, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW )",
+ dirfd, path, (int)chown_uid,
+ (int)chown_gid);
+
+ ret = fchmodat(dirfd, (*path != '\0') ? path : ".", chmod_mode, 0);
+ if (ret < 0)
+ return log_warn_errno(-1, errno, "Failed to fchmodat(%d, %s, %d, AT_SYMLINK_NOFOLLOW)",
+ dirfd, path, (int)chmod_mode);
+
+ return 0;
+}
+
+/* chgrp the container cgroups to container group. We leave
+ * the container owner as cgroup owner. So we must make the
+ * directories 775 so that the container can create sub-cgroups.
+ *
+ * Also chown the tasks and cgroup.procs files. Those may not
+ * exist depending on kernel version.
+ */
+static int chown_cgroup_wrapper(void *data)
+{
+ int ret;
+ uid_t destuid;
+ struct generic_userns_exec_data *arg = data;
+ uid_t nsuid = (arg->conf->root_nsuid_map != NULL) ? 0 : arg->conf->init_uid;
+ gid_t nsgid = (arg->conf->root_nsgid_map != NULL) ? 0 : arg->conf->init_gid;
+
+ if (!lxc_setgroups(0, NULL) && errno != EPERM)
+ return log_error_errno(-1, errno, "Failed to setgroups(0, NULL)");
+
+ ret = setresgid(nsgid, nsgid, nsgid);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to setresgid(%d, %d, %d)",
+ (int)nsgid, (int)nsgid, (int)nsgid);
+
+ ret = setresuid(nsuid, nsuid, nsuid);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to setresuid(%d, %d, %d)",
+ (int)nsuid, (int)nsuid, (int)nsuid);
+
+ destuid = get_ns_uid(arg->origuid);
+ if (destuid == LXC_INVALID_UID)
+ destuid = 0;
+
+ for (int i = 0; arg->hierarchies[i]; i++) {
+ int dirfd = arg->hierarchies[i]->cgfd_con;
+
+ (void)fchowmodat(dirfd, "", destuid, nsgid, 0775);
+
+ /*
+ * Failures to chown() these are inconvenient but not
+ * detrimental We leave these owned by the container launcher,
+ * so that container root can write to the files to attach. We
+ * chmod() them 664 so that container systemd can write to the
+ * files (which systemd in wily insists on doing).
+ */
+
+ if (arg->hierarchies[i]->version == CGROUP_SUPER_MAGIC)
+ (void)fchowmodat(dirfd, "tasks", destuid, nsgid, 0664);
+
+ (void)fchowmodat(dirfd, "cgroup.procs", destuid, nsgid, 0664);
+
+ if (arg->hierarchies[i]->version != CGROUP2_SUPER_MAGIC)
+ continue;
+
+ for (char **p = arg->hierarchies[i]->cgroup2_chown; p && *p; p++)
+ (void)fchowmodat(dirfd, *p, destuid, nsgid, 0664);
+ }
+
+ return 0;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_chown(struct cgroup_ops *ops,
+ struct lxc_conf *conf)
+{
+ struct generic_userns_exec_data wrap;
+
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ if (!ops->hierarchies)
+ return true;
+
+ if (!ops->container_cgroup)
+ return ret_set_errno(false, ENOENT);
+
+ if (!conf)
+ return ret_set_errno(false, EINVAL);
+
+ if (lxc_list_empty(&conf->id_map))
+ return true;
+
+ wrap.origuid = geteuid();
+ wrap.path = NULL;
+ wrap.hierarchies = ops->hierarchies;
+ wrap.conf = conf;
+
+ if (userns_exec_1(conf, chown_cgroup_wrapper, &wrap, "chown_cgroup_wrapper") < 0)
+ return log_error_errno(false, errno, "Error requesting cgroup chown in new user namespace");
+
+ return true;
+}
+
+__cgfsng_ops void isulad_cgfsng_payload_finalize(struct cgroup_ops *ops)
+{
+ if (!ops)
+ return;
+
+ if (!ops->hierarchies)
+ return;
+
+ for (int i = 0; ops->hierarchies[i]; i++) {
+ struct hierarchy *h = ops->hierarchies[i];
+ /*
+ * we don't keep the fds for non-unified hierarchies around
+ * mainly because we don't make use of them anymore after the
+ * core cgroup setup is done but also because there are quite a
+ * lot of them.
+ */
+ if (!is_unified_hierarchy(h))
+ close_prot_errno_disarm(h->cgfd_con);
+ }
+}
+
+/* cgroup-full:* is done, no need to create subdirs */
+static inline bool cg_mount_needs_subdirs(int type)
+{
+ return !(type >= LXC_AUTO_CGROUP_FULL_RO);
+}
+
+/* After $rootfs/sys/fs/container/controller/the/cg/path has been created,
+ * remount controller ro if needed and bindmount the cgroupfs onto
+ * control/the/cg/path.
+ */
+static int cg_legacy_mount_controllers(int type, struct hierarchy *h,
+ char *controllerpath, char *cgpath,
+ const char *container_cgroup)
+{
+ __do_free char *sourcepath = NULL;
+ int ret, remount_flags;
+ int flags = MS_BIND;
+
+ if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_MIXED) {
+ ret = mount(controllerpath, controllerpath, "cgroup", MS_BIND, NULL);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to bind mount \"%s\" onto \"%s\"",
+ controllerpath, controllerpath);
+
+ remount_flags = add_required_remount_flags(controllerpath,
+ controllerpath,
+ flags | MS_REMOUNT);
+ ret = mount(controllerpath, controllerpath, "cgroup",
+ remount_flags | MS_REMOUNT | MS_BIND | MS_RDONLY,
+ NULL);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to remount \"%s\" ro", controllerpath);
+
+ INFO("Remounted %s read-only", controllerpath);
+ }
+
+ sourcepath = must_make_path(h->mountpoint, h->container_base_path,
+ container_cgroup, NULL);
+ if (type == LXC_AUTO_CGROUP_RO)
+ flags |= MS_RDONLY;
+
+ ret = mount(sourcepath, cgpath, "cgroup", flags, NULL);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to mount \"%s\" onto \"%s\"",
+ h->controllers[0], cgpath);
+ INFO("Mounted \"%s\" onto \"%s\"", h->controllers[0], cgpath);
+
+ if (flags & MS_RDONLY) {
+ remount_flags = add_required_remount_flags(sourcepath, cgpath,
+ flags | MS_REMOUNT);
+ ret = mount(sourcepath, cgpath, "cgroup", remount_flags, NULL);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to remount \"%s\" ro", cgpath);
+ INFO("Remounted %s read-only", cgpath);
+ }
+
+ INFO("Completed second stage cgroup automounts for \"%s\"", cgpath);
+ return 0;
+}
+
+/* __cg_mount_direct
+ *
+ * Mount cgroup hierarchies directly without using bind-mounts. The main
+ * uses-cases are mounting cgroup hierarchies in cgroup namespaces and mounting
+ * cgroups for the LXC_AUTO_CGROUP_FULL option.
+ */
+static int __cg_mount_direct(int type, struct hierarchy *h,
+ const char *controllerpath)
+{
+ __do_free char *controllers = NULL;
+ char *fstype = "cgroup2";
+ unsigned long flags = 0;
+ int ret;
+
+ flags |= MS_NOSUID;
+ flags |= MS_NOEXEC;
+ flags |= MS_NODEV;
+ flags |= MS_RELATIME;
+
+ if (type == LXC_AUTO_CGROUP_RO || type == LXC_AUTO_CGROUP_FULL_RO)
+ flags |= MS_RDONLY;
+
+ if (h->version != CGROUP2_SUPER_MAGIC) {
+ controllers = lxc_string_join(",", (const char **)h->controllers, false);
+ if (!controllers)
+ return -ENOMEM;
+ fstype = "cgroup";
+ }
+
+ ret = mount("cgroup", controllerpath, fstype, flags, controllers);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to mount \"%s\" with cgroup filesystem type %s",
+ controllerpath, fstype);
+
+ DEBUG("Mounted \"%s\" with cgroup filesystem type %s", controllerpath, fstype);
+ return 0;
+}
+
+static inline int cg_mount_in_cgroup_namespace(int type, struct hierarchy *h,
+ const char *controllerpath)
+{
+ return __cg_mount_direct(type, h, controllerpath);
+}
+
+static inline int cg_mount_cgroup_full(int type, struct hierarchy *h,
+ const char *controllerpath)
+{
+ if (type < LXC_AUTO_CGROUP_FULL_RO || type > LXC_AUTO_CGROUP_FULL_MIXED)
+ return 0;
+
+ return __cg_mount_direct(type, h, controllerpath);
+}
+
+__cgfsng_ops static bool isulad_cgfsng_mount(struct cgroup_ops *ops,
+ struct lxc_handler *handler,
+ const char *root, int type)
+{
+ int i, ret;
+ char *tmpfspath = NULL;
+ char *systemdpath = NULL;
+ char *unifiedpath = NULL;
+ bool has_cgns = false, retval = false, wants_force_mount = false;
+ char **merged = NULL;
+
+ if ((type & LXC_AUTO_CGROUP_MASK) == 0)
+ return true;
+
+ if (type & LXC_AUTO_CGROUP_FORCE) {
+ type &= ~LXC_AUTO_CGROUP_FORCE;
+ wants_force_mount = true;
+ }
+
+ if (!wants_force_mount) {
+ if (!lxc_list_empty(&handler->conf->keepcaps))
+ wants_force_mount = !in_caplist(CAP_SYS_ADMIN, &handler->conf->keepcaps);
+ else
+ wants_force_mount = in_caplist(CAP_SYS_ADMIN, &handler->conf->caps);
+ }
+
+ has_cgns = cgns_supported();
+ if (has_cgns && !wants_force_mount)
+ return true;
+
+ if (type == LXC_AUTO_CGROUP_NOSPEC)
+ type = LXC_AUTO_CGROUP_MIXED;
+ else if (type == LXC_AUTO_CGROUP_FULL_NOSPEC)
+ type = LXC_AUTO_CGROUP_FULL_MIXED;
+
+ /* Mount tmpfs */
+ tmpfspath = must_make_path(root, "/sys/fs/cgroup", NULL);
+ if (mkdir_p(tmpfspath, 0755) < 0) {
+ ERROR("Failed to create directory: %s", tmpfspath);
+ goto on_error;
+ }
+ ret = safe_mount(NULL, tmpfspath, "tmpfs",
+ MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME,
+ "size=10240k,mode=755", root, handler->conf->lsm_se_mount_context);
+ if (ret < 0)
+ goto on_error;
+
+ for (i = 0; ops->hierarchies[i]; i++) {
+ char *controllerpath = NULL;
+ char *path2 = NULL;
+ struct hierarchy *h = ops->hierarchies[i];
+ char *controller = strrchr(h->mountpoint, '/');
+
+ if (!controller)
+ continue;
+ controller++;
+
+ // isulad: symlink subcgroup
+ if (strchr(controller, ',') != NULL) {
+ int pret;
+ pret = lxc_append_string(&merged, controller);
+ if (pret < 0)
+ goto on_error;
+ }
+
+ controllerpath = must_make_path(tmpfspath, controller, NULL);
+ if (dir_exists(controllerpath)) {
+ free(controllerpath);
+ continue;
+ }
+
+ ret = mkdir(controllerpath, 0755);
+ if (ret < 0) {
+ SYSERROR("Error creating cgroup path: %s", controllerpath);
+ free(controllerpath);
+ goto on_error;
+ }
+
+ if (has_cgns && wants_force_mount) {
+ /* If cgroup namespaces are supported but the container
+ * will not have CAP_SYS_ADMIN after it has started we
+ * need to mount the cgroups manually.
+ */
+ ret = cg_mount_in_cgroup_namespace(type, h, controllerpath);
+ free(controllerpath);
+ if (ret < 0)
+ goto on_error;
+
+ continue;
+ }
+
+ ret = cg_mount_cgroup_full(type, h, controllerpath);
+ if (ret < 0) {
+ free(controllerpath);
+ goto on_error;
+ }
+
+ if (!cg_mount_needs_subdirs(type)) {
+ free(controllerpath);
+ continue;
+ }
+
+ // isulad: ignore ops->container_cgroup so we will not see directory lxc after /sys/fs/cgroup/xxx in container,
+ // isulad: ignore h->container_base_path so we will not see subgroup of /sys/fs/cgroup/xxx/subgroup in container
+ path2 = must_make_path(controllerpath, NULL);
+ ret = mkdir_p(path2, 0755);
+ if (ret < 0) {
+ free(controllerpath);
+ free(path2);
+ goto on_error;
+ }
+
+ ret = cg_legacy_mount_controllers(type, h, controllerpath,
+ path2, ops->container_cgroup);
+ free(controllerpath);
+ free(path2);
+ if (ret < 0)
+ goto on_error;
+ }
+
+ // isulad: symlink subcgroup
+ if (merged) {
+ char **mc = NULL;
+ for (mc = merged; *mc; mc++) {
+ char *token = NULL;
+ char *copy = must_copy_string(*mc);
+ lxc_iterate_parts(token, copy, ",") {
+ int mret;
+ char *link;
+ link = must_make_path(tmpfspath, token, NULL);
+ mret = symlink(*mc, link);
+ if (mret < 0 && errno != EEXIST) {
+ SYSERROR("Failed to create link %s for target %s", link, *mc);
+ free(copy);
+ free(link);
+ goto on_error;
+ }
+ free(link);
+ }
+ free(copy);
+ }
+ }
+
+
+ // isulad: remount /sys/fs/cgroup to readonly
+ if (type == LXC_AUTO_CGROUP_FULL_RO || type == LXC_AUTO_CGROUP_RO) {
+ ret = mount(tmpfspath, tmpfspath, "bind",
+ MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_RELATIME|MS_RDONLY|MS_BIND|MS_REMOUNT, NULL);
+ if (ret < 0) {
+ SYSERROR("Failed to remount /sys/fs/cgroup.");
+ goto on_error;
+ }
+ }
+
+ // isulad: remount /sys/fs/cgroup/systemd to readwrite for system container
+ if (handler->conf->systemd != NULL && strcmp(handler->conf->systemd, "true") == 0)
+ {
+ unifiedpath = must_make_path(root, "/sys/fs/cgroup/unified", NULL);
+ if (dir_exists(unifiedpath))
+ {
+ ret = umount2(unifiedpath, MNT_DETACH);
+ if (ret < 0)
+ {
+ SYSERROR("Failed to umount /sys/fs/cgroup/unified.");
+ goto on_error;
+ }
+ }
+
+ systemdpath = must_make_path(root, "/sys/fs/cgroup/systemd", NULL);
+ ret = mount(systemdpath, systemdpath, "bind",
+ MS_NOSUID | MS_NODEV | MS_NOEXEC | MS_RELATIME | MS_BIND | MS_REMOUNT, NULL);
+ if (ret < 0)
+ {
+ SYSERROR("Failed to remount /sys/fs/cgroup/systemd.");
+ goto on_error;
+ }
+ }
+
+ retval = true;
+
+on_error:
+ free(tmpfspath);
+ if (systemdpath != NULL)
+ {
+ free(systemdpath);
+ }
+ if (unifiedpath != NULL)
+ {
+ free(unifiedpath);
+ }
+ lxc_free_array((void **)merged, free);
+ return retval;
+}
+
+/* Only root needs to escape to the cgroup of its init. */
+__cgfsng_ops static bool isulad_cgfsng_escape(const struct cgroup_ops *ops,
+ struct lxc_conf *conf)
+{
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ if (!ops->hierarchies)
+ return true;
+
+ if (!conf)
+ return ret_set_errno(false, EINVAL);
+
+ if (conf->cgroup_meta.relative || geteuid())
+ return true;
+
+ for (int i = 0; ops->hierarchies[i]; i++) {
+ __do_free char *fullpath = NULL;
+ int ret;
+
+ fullpath =
+ must_make_path(ops->hierarchies[i]->mountpoint,
+ ops->hierarchies[i]->container_base_path,
+ "cgroup.procs", NULL);
+ ret = lxc_write_to_file(fullpath, "0", 2, false, 0666);
+ if (ret != 0)
+ return log_error_errno(false, errno, "Failed to escape to cgroup \"%s\"", fullpath);
+ }
+
+ return true;
+}
+
+__cgfsng_ops static int isulad_cgfsng_num_hierarchies(struct cgroup_ops *ops)
+{
+ int i = 0;
+
+ if (!ops)
+ return ret_set_errno(-1, ENOENT);
+
+ if (!ops->hierarchies)
+ return 0;
+
+ for (; ops->hierarchies[i]; i++)
+ ;
+
+ return i;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_get_hierarchies(struct cgroup_ops *ops, int n,
+ char ***out)
+{
+ int i;
+
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ if (!ops->hierarchies)
+ return ret_set_errno(false, ENOENT);
+
+ /* sanity check n */
+ for (i = 0; i < n; i++)
+ if (!ops->hierarchies[i])
+ return ret_set_errno(false, ENOENT);
+
+ *out = ops->hierarchies[i]->controllers;
+
+ return true;
+}
+
+static bool cg_legacy_freeze(struct cgroup_ops *ops)
+{
+ struct hierarchy *h;
+
+ h = get_hierarchy(ops, "freezer");
+ if (!h)
+ return ret_set_errno(-1, ENOENT);
+
+ return lxc_write_openat(h->container_full_path, "freezer.state",
+ "FROZEN", STRLITERALLEN("FROZEN"));
+}
+
+static int freezer_cgroup_events_cb(int fd, uint32_t events, void *cbdata,
+ struct lxc_epoll_descr *descr)
+{
+ __do_close int duped_fd = -EBADF;
+ __do_free char *line = NULL;
+ __do_fclose FILE *f = NULL;
+ int state = PTR_TO_INT(cbdata);
+ size_t len;
+ const char *state_string;
+
+ duped_fd = dup(fd);
+ if (duped_fd < 0)
+ return LXC_MAINLOOP_ERROR;
+
+ if (lseek(duped_fd, 0, SEEK_SET) < (off_t)-1)
+ return LXC_MAINLOOP_ERROR;
+
+ f = fdopen(duped_fd, "re");
+ if (!f)
+ return LXC_MAINLOOP_ERROR;
+ move_fd(duped_fd);
+
+ if (state == 1)
+ state_string = "frozen 1";
+ else
+ state_string = "frozen 0";
+
+ while (getline(&line, &len, f) != -1)
+ if (strncmp(line, state_string, STRLITERALLEN("frozen") + 2) == 0)
+ return LXC_MAINLOOP_CLOSE;
+
+ return LXC_MAINLOOP_CONTINUE;
+}
+
+static int cg_unified_freeze(struct cgroup_ops *ops, int timeout)
+{
+ __do_close int fd = -EBADF;
+ call_cleaner(lxc_mainloop_close) struct lxc_epoll_descr *descr_ptr = NULL;
+ int ret;
+ struct lxc_epoll_descr descr;
+ struct hierarchy *h;
+
+ h = ops->unified;
+ if (!h)
+ return ret_set_errno(-1, ENOENT);
+
+ if (!h->container_full_path)
+ return ret_set_errno(-1, EEXIST);
+
+ if (timeout != 0) {
+ __do_free char *events_file = NULL;
+
+ events_file = must_make_path(h->container_full_path, "cgroup.events", NULL);
+ fd = open(events_file, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(-1, errno, "Failed to open cgroup.events file");
+
+ ret = lxc_mainloop_open(&descr);
+ if (ret)
+ return log_error_errno(-1, errno, "Failed to create epoll instance to wait for container freeze");
+
+ /* automatically cleaned up now */
+ descr_ptr = &descr;
+
+ ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){1}));
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
+ }
+
+ ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", "1", 1);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to open cgroup.freeze file");
+
+ if (timeout != 0 && lxc_mainloop(&descr, timeout))
+ return log_error_errno(-1, errno, "Failed to wait for container to be frozen");
+
+ return 0;
+}
+
+__cgfsng_ops static int isulad_cgfsng_freeze(struct cgroup_ops *ops, int timeout)
+{
+ if (!ops->hierarchies)
+ return ret_set_errno(-1, ENOENT);
+
+ if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED)
+ return cg_legacy_freeze(ops);
+
+ return cg_unified_freeze(ops, timeout);
+}
+
+static int cg_legacy_unfreeze(struct cgroup_ops *ops)
+{
+ struct hierarchy *h;
+
+ h = get_hierarchy(ops, "freezer");
+ if (!h)
+ return ret_set_errno(-1, ENOENT);
+
+ return lxc_write_openat(h->container_full_path, "freezer.state",
+ "THAWED", STRLITERALLEN("THAWED"));
+}
+
+static int cg_unified_unfreeze(struct cgroup_ops *ops, int timeout)
+{
+ __do_close int fd = -EBADF;
+ call_cleaner(lxc_mainloop_close)struct lxc_epoll_descr *descr_ptr = NULL;
+ int ret;
+ struct lxc_epoll_descr descr;
+ struct hierarchy *h;
+
+ h = ops->unified;
+ if (!h)
+ return ret_set_errno(-1, ENOENT);
+
+ if (!h->container_full_path)
+ return ret_set_errno(-1, EEXIST);
+
+ if (timeout != 0) {
+ __do_free char *events_file = NULL;
+
+ events_file = must_make_path(h->container_full_path, "cgroup.events", NULL);
+ fd = open(events_file, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return log_error_errno(-1, errno, "Failed to open cgroup.events file");
+
+ ret = lxc_mainloop_open(&descr);
+ if (ret)
+ return log_error_errno(-1, errno, "Failed to create epoll instance to wait for container unfreeze");
+
+ /* automatically cleaned up now */
+ descr_ptr = &descr;
+
+ ret = lxc_mainloop_add_handler(&descr, fd, freezer_cgroup_events_cb, INT_TO_PTR((int){0}));
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to add cgroup.events fd handler to mainloop");
+ }
+
+ ret = lxc_write_openat(h->container_full_path, "cgroup.freeze", "0", 1);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to open cgroup.freeze file");
+
+ if (timeout != 0 && lxc_mainloop(&descr, timeout))
+ return log_error_errno(-1, errno, "Failed to wait for container to be unfrozen");
+
+ return 0;
+}
+
+__cgfsng_ops static int isulad_cgfsng_unfreeze(struct cgroup_ops *ops, int timeout)
+{
+ if (!ops->hierarchies)
+ return ret_set_errno(-1, ENOENT);
+
+ if (ops->cgroup_layout != CGROUP_LAYOUT_UNIFIED)
+ return cg_legacy_unfreeze(ops);
+
+ return cg_unified_unfreeze(ops, timeout);
+}
+
+__cgfsng_ops static const char *isulad_cgfsng_get_cgroup(struct cgroup_ops *ops,
+ const char *controller)
+{
+ struct hierarchy *h;
+
+ h = get_hierarchy(ops, controller);
+ if (!h)
+ return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
+ controller ? controller : "(null)");
+
+ if (!h->container_full_path)
+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL);
+
+ return h->container_full_path
+ ? h->container_full_path + strlen(h->mountpoint)
+ : NULL;
+}
+
+__cgfsng_ops static const char *isulad_cgfsng_get_cgroup_full_path(struct cgroup_ops *ops,
+ const char *controller)
+{
+ struct hierarchy *h;
+
+ h = get_hierarchy(ops, controller);
+ if (!h)
+ return log_warn_errno(NULL, ENOENT, "Failed to find hierarchy for controller \"%s\"",
+ controller ? controller : "(null)");
+
+ if (!h->container_full_path)
+ h->container_full_path = must_make_path(h->mountpoint, h->container_base_path, ops->container_cgroup, NULL);
+
+ return h->container_full_path;
+}
+
+/* Given a cgroup path returned from lxc_cmd_get_cgroup_path, build a full path,
+ * which must be freed by the caller.
+ */
+static inline char *build_full_cgpath_from_monitorpath(struct hierarchy *h,
+ const char *inpath,
+ const char *filename)
+{
+ return must_make_path(h->mountpoint, inpath, filename, NULL);
+}
+
+static int cgroup_attach_leaf(const struct lxc_conf *conf, int unified_fd, pid_t pid)
+{
+ int idx = 1;
+ int ret;
+ char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
+ size_t pidstr_len;
+
+ /* Create leaf cgroup. */
+ ret = mkdirat(unified_fd, ".lxc", 0755);
+ if (ret < 0 && errno != EEXIST)
+ return log_error_errno(-1, errno, "Failed to create leaf cgroup \".lxc\"");
+
+ pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
+ ret = lxc_writeat(unified_fd, ".lxc/cgroup.procs", pidstr, pidstr_len);
+ if (ret < 0)
+ ret = lxc_writeat(unified_fd, "cgroup.procs", pidstr, pidstr_len);
+ if (ret == 0)
+ return 0;
+
+ /* this is a non-leaf node */
+ if (errno != EBUSY)
+ return log_error_errno(-1, errno, "Failed to attach to unified cgroup");
+
+ do {
+ bool rm = false;
+ char attach_cgroup[STRLITERALLEN(".lxc-1000/cgroup.procs") + 1];
+ char *slash;
+
+ ret = snprintf(attach_cgroup, sizeof(attach_cgroup), ".lxc-%d/cgroup.procs", idx);
+ if (ret < 0 || (size_t)ret >= sizeof(attach_cgroup))
+ return ret_errno(EIO);
+
+ slash = &attach_cgroup[ret] - STRLITERALLEN("/cgroup.procs");
+ *slash = '\0';
+
+ ret = mkdirat(unified_fd, attach_cgroup, 0755);
+ if (ret < 0 && errno != EEXIST)
+ return log_error_errno(-1, errno, "Failed to create cgroup %s", attach_cgroup);
+ if (ret == 0)
+ rm = true;
+
+ *slash = '/';
+
+ ret = lxc_writeat(unified_fd, attach_cgroup, pidstr, pidstr_len);
+ if (ret == 0)
+ return 0;
+
+ if (rm && unlinkat(unified_fd, attach_cgroup, AT_REMOVEDIR))
+ SYSERROR("Failed to remove cgroup \"%d(%s)\"", unified_fd, attach_cgroup);
+
+ /* this is a non-leaf node */
+ if (errno != EBUSY)
+ return log_error_errno(-1, errno, "Failed to attach to unified cgroup");
+
+ idx++;
+ } while (idx < 1000);
+
+ return log_error_errno(-1, errno, "Failed to attach to unified cgroup");
+}
+
+static int cgroup_attach_create_leaf(const struct lxc_conf *conf,
+ int unified_fd, int *sk_fd)
+{
+ __do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF;
+ int target_fds[2];
+ ssize_t ret;
+
+ /* Create leaf cgroup. */
+ ret = mkdirat(unified_fd, ".lxc", 0755);
+ if (ret < 0 && errno != EEXIST)
+ return log_error_errno(-1, errno, "Failed to create leaf cgroup \".lxc\"");
+
+ target_fd0 = openat(unified_fd, ".lxc/cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+ if (target_fd0 < 0)
+ return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\"");
+ target_fds[0] = target_fd0;
+
+ target_fd1 = openat(unified_fd, "cgroup.procs", O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+ if (target_fd1 < 0)
+ return log_error_errno(-errno, errno, "Failed to open \".lxc/cgroup.procs\"");
+ target_fds[1] = target_fd1;
+
+ ret = lxc_abstract_unix_send_fds(sk, target_fds, 2, NULL, 0);
+ if (ret <= 0)
+ return log_error_errno(-errno, errno, "Failed to send \".lxc/cgroup.procs\" fds %d and %d",
+ target_fd0, target_fd1);
+
+ return log_debug(0, "Sent target cgroup fds %d and %d", target_fd0, target_fd1);
+}
+
+static int cgroup_attach_move_into_leaf(const struct lxc_conf *conf,
+ int *sk_fd, pid_t pid)
+{
+ __do_close int sk = *sk_fd, target_fd0 = -EBADF, target_fd1 = -EBADF;
+ int target_fds[2];
+ char pidstr[INTTYPE_TO_STRLEN(int64_t) + 1];
+ size_t pidstr_len;
+ ssize_t ret;
+
+ ret = lxc_abstract_unix_recv_fds(sk, target_fds, 2, NULL, 0);
+ if (ret <= 0)
+ return log_error_errno(-1, errno, "Failed to receive target cgroup fd");
+ target_fd0 = target_fds[0];
+ target_fd1 = target_fds[1];
+
+ pidstr_len = sprintf(pidstr, INT64_FMT, (int64_t)pid);
+
+ ret = lxc_write_nointr(target_fd0, pidstr, pidstr_len);
+ if (ret > 0 && ret == pidstr_len)
+ return log_debug(0, "Moved process into target cgroup via fd %d", target_fd0);
+
+ ret = lxc_write_nointr(target_fd1, pidstr, pidstr_len);
+ if (ret > 0 && ret == pidstr_len)
+ return log_debug(0, "Moved process into target cgroup via fd %d", target_fd1);
+
+ return log_debug_errno(-1, errno, "Failed to move process into target cgroup via fd %d and %d",
+ target_fd0, target_fd1);
+}
+
+struct userns_exec_unified_attach_data {
+ const struct lxc_conf *conf;
+ int unified_fd;
+ int sk_pair[2];
+ pid_t pid;
+};
+
+static int cgroup_unified_attach_child_wrapper(void *data)
+{
+ struct userns_exec_unified_attach_data *args = data;
+
+ if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
+ args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
+ return ret_errno(EINVAL);
+
+ close_prot_errno_disarm(args->sk_pair[0]);
+ return cgroup_attach_create_leaf(args->conf, args->unified_fd,
+ &args->sk_pair[1]);
+}
+
+static int cgroup_unified_attach_parent_wrapper(void *data)
+{
+ struct userns_exec_unified_attach_data *args = data;
+
+ if (!args->conf || args->unified_fd < 0 || args->pid <= 0 ||
+ args->sk_pair[0] < 0 || args->sk_pair[1] < 0)
+ return ret_errno(EINVAL);
+
+ close_prot_errno_disarm(args->sk_pair[1]);
+ return cgroup_attach_move_into_leaf(args->conf, &args->sk_pair[0],
+ args->pid);
+}
+
+int cgroup_attach(const struct lxc_conf *conf, const char *name,
+ const char *lxcpath, pid_t pid)
+{
+ __do_close int unified_fd = -EBADF;
+ int ret;
+
+ if (!conf || !name || !lxcpath || pid <= 0)
+ return ret_errno(EINVAL);
+
+ unified_fd = lxc_cmd_get_cgroup2_fd(name, lxcpath);
+ if (unified_fd < 0)
+ return ret_errno(EBADF);
+
+ if (!lxc_list_empty(&conf->id_map)) {
+ struct userns_exec_unified_attach_data args = {
+ .conf = conf,
+ .unified_fd = unified_fd,
+ .pid = pid,
+ };
+
+ ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
+ if (ret < 0)
+ return -errno;
+
+ ret = userns_exec_minimal(conf,
+ cgroup_unified_attach_parent_wrapper,
+ &args,
+ cgroup_unified_attach_child_wrapper,
+ &args);
+ } else {
+ ret = cgroup_attach_leaf(conf, unified_fd, pid);
+ }
+
+ return ret;
+}
+
+/* Technically, we're always at a delegation boundary here (This is especially
+ * true when cgroup namespaces are available.). The reasoning is that in order
+ * for us to have been able to start a container in the first place the root
+ * cgroup must have been a leaf node. Now, either the container's init system
+ * has populated the cgroup and kept it as a leaf node or it has created
+ * subtrees. In the former case we will simply attach to the leaf node we
+ * created when we started the container in the latter case we create our own
+ * cgroup for the attaching process.
+ */
+static int __cg_unified_attach(const struct hierarchy *h,
+ const struct lxc_conf *conf, const char *name,
+ const char *lxcpath, pid_t pid,
+ const char *controller)
+{
+ __do_close int unified_fd = -EBADF;
+ __do_free char *path = NULL, *cgroup = NULL;
+ int ret;
+
+ if (!conf || !name || !lxcpath || pid <= 0)
+ return ret_errno(EINVAL);
+
+ ret = cgroup_attach(conf, name, lxcpath, pid);
+ if (ret == 0)
+ return log_trace(0, "Attached to unified cgroup via command handler");
+ if (ret != -EBADF)
+ return log_error_errno(ret, errno, "Failed to attach to unified cgroup");
+
+ /* Fall back to retrieving the path for the unified cgroup. */
+ cgroup = lxc_cmd_get_cgroup_path(name, lxcpath, controller);
+ /* not running */
+ if (!cgroup)
+ return 0;
+
+ path = must_make_path(h->mountpoint, cgroup, NULL);
+
+ unified_fd = open(path, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ if (unified_fd < 0)
+ return ret_errno(EBADF);
+
+ if (!lxc_list_empty(&conf->id_map)) {
+ struct userns_exec_unified_attach_data args = {
+ .conf = conf,
+ .unified_fd = unified_fd,
+ .pid = pid,
+ };
+
+ ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, args.sk_pair);
+ if (ret < 0)
+ return -errno;
+
+ ret = userns_exec_minimal(conf,
+ cgroup_unified_attach_parent_wrapper,
+ &args,
+ cgroup_unified_attach_child_wrapper,
+ &args);
+ } else {
+ ret = cgroup_attach_leaf(conf, unified_fd, pid);
+ }
+
+ return ret;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_attach(struct cgroup_ops *ops,
+ const struct lxc_conf *conf,
+ const char *name, const char *lxcpath,
+ pid_t pid)
+{
+ int len, ret;
+ char pidstr[INTTYPE_TO_STRLEN(pid_t)];
+
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ if (!ops->hierarchies)
+ return true;
+
+ len = snprintf(pidstr, sizeof(pidstr), "%d", pid);
+ if (len < 0 || (size_t)len >= sizeof(pidstr))
+ return false;
+
+ for (int i = 0; ops->hierarchies[i]; i++) {
+ __do_free char *fullpath = NULL, *path = NULL;
+ struct hierarchy *h = ops->hierarchies[i];
+
+ if (h->version == CGROUP2_SUPER_MAGIC) {
+ ret = __cg_unified_attach(h, conf, name, lxcpath, pid,
+ h->controllers[0]);
+ if (ret < 0)
+ return false;
+
+ continue;
+ }
+
+ path = lxc_cmd_get_cgroup_path(name, lxcpath, h->controllers[0]);
+ /* not running */
+ if (!path)
+ return false;
+
+ fullpath = build_full_cgpath_from_monitorpath(h, path, "cgroup.procs");
+ ret = lxc_write_to_file(fullpath, pidstr, len, false, 0666);
+ if (ret < 0)
+ return log_error_errno(false, errno, "Failed to attach %d to %s",
+ (int)pid, fullpath);
+ }
+
+ return true;
+}
+
+__cgfsng_ops static int isulad_cgfsng_get(struct cgroup_ops *ops, const char *filename,
+ char *value, size_t len, const char *name,
+ const char *lxcpath)
+{
+ int ret = -1;
+ size_t controller_len;
+ char *controller, *p, *path;
+ struct hierarchy *h;
+
+ controller_len = strlen(filename);
+ controller = alloca(controller_len + 1);
+ (void)strlcpy(controller, filename, controller_len + 1);
+
+ p = strchr(controller, '.');
+ if (p)
+ *p = '\0';
+
+ const char *ori_path = ops->get_cgroup(ops, controller);
+ if (ori_path == NULL) {
+ ERROR("Failed to get cgroup path:%s", controller);
+ return -1;
+ }
+ path = safe_strdup(ori_path);
+
+ h = get_hierarchy(ops, controller);
+ if (h) {
+ char *fullpath;
+
+ fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+ ret = lxc_read_from_file(fullpath, value, len);
+ free(fullpath);
+ }
+ free(path);
+
+ return ret;
+}
+
+static int device_cgroup_parse_access(struct device_item *device, const char *val)
+{
+ for (int count = 0; count < 3; count++, val++) {
+ switch (*val) {
+ case 'r':
+ device->access[count] = *val;
+ break;
+ case 'w':
+ device->access[count] = *val;
+ break;
+ case 'm':
+ device->access[count] = *val;
+ break;
+ case '\n':
+ case '\0':
+ count = 3;
+ break;
+ default:
+ return ret_errno(EINVAL);
+ }
+ }
+
+ return 0;
+}
+
+int device_cgroup_rule_parse(struct device_item *device, const char *key,
+ const char *val)
+{
+ int count, ret;
+ char temp[50];
+
+ if (strcmp("devices.allow", key) == 0)
+ device->allow = 1;
+ else
+ device->allow = 0;
+
+ if (strcmp(val, "a") == 0) {
+ /* global rule */
+ device->type = 'a';
+ device->major = -1;
+ device->minor = -1;
+ device->global_rule = device->allow
+ ? LXC_BPF_DEVICE_CGROUP_BLACKLIST
+ : LXC_BPF_DEVICE_CGROUP_WHITELIST;
+ device->allow = -1;
+ return 0;
+ }
+
+ /* local rule */
+ device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE;
+
+ switch (*val) {
+ case 'a':
+ __fallthrough;
+ case 'b':
+ __fallthrough;
+ case 'c':
+ device->type = *val;
+ break;
+ default:
+ return -1;
+ }
+
+ val++;
+ if (!isspace(*val))
+ return -1;
+ val++;
+ if (*val == '*') {
+ device->major = -1;
+ val++;
+ } else if (isdigit(*val)) {
+ memset(temp, 0, sizeof(temp));
+ for (count = 0; count < sizeof(temp) - 1; count++) {
+ temp[count] = *val;
+ val++;
+ if (!isdigit(*val))
+ break;
+ }
+ ret = lxc_safe_int(temp, &device->major);
+ if (ret)
+ return -1;
+ } else {
+ return -1;
+ }
+ if (*val != ':')
+ return -1;
+ val++;
+
+ /* read minor */
+ if (*val == '*') {
+ device->minor = -1;
+ val++;
+ } else if (isdigit(*val)) {
+ memset(temp, 0, sizeof(temp));
+ for (count = 0; count < sizeof(temp) - 1; count++) {
+ temp[count] = *val;
+ val++;
+ if (!isdigit(*val))
+ break;
+ }
+ ret = lxc_safe_int(temp, &device->minor);
+ if (ret)
+ return -1;
+ } else {
+ return -1;
+ }
+ if (!isspace(*val))
+ return -1;
+
+ return device_cgroup_parse_access(device, ++val);
+}
+
+__cgfsng_ops static int isulad_cgfsng_set(struct cgroup_ops *ops,
+ const char *filename, const char *value,
+ const char *name, const char *lxcpath)
+{
+ int ret = -1;
+ size_t controller_len;
+ char *controller, *p, *path;
+ struct hierarchy *h;
+
+ controller_len = strlen(filename);
+ controller = alloca(controller_len + 1);
+ (void)strlcpy(controller, filename, controller_len + 1);
+
+ p = strchr(controller, '.');
+ if (p)
+ *p = '\0';
+
+ const char *ori_path = ops->get_cgroup(ops, controller);
+ if (ori_path == NULL) {
+ ERROR("Failed to get cgroup path:%s", controller);
+ return -1;
+ }
+ path = safe_strdup(ori_path);
+
+ h = get_hierarchy(ops, controller);
+ if (h) {
+ char *fullpath;
+
+ fullpath = build_full_cgpath_from_monitorpath(h, path, filename);
+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
+ free(fullpath);
+ }
+ free(path);
+
+ return ret;
+}
+
+/* take devices cgroup line
+ * /dev/foo rwx
+ * and convert it to a valid
+ * type major:minor mode
+ * line. Return <0 on error. Dest is a preallocated buffer long enough to hold
+ * the output.
+ */
+static int device_cgroup_rule_parse_devpath(struct device_item *device,
+ const char *devpath)
+{
+ __do_free char *path = NULL;
+ char *mode = NULL;
+ int n_parts, ret;
+ char *p;
+ struct stat sb;
+
+ path = must_copy_string(devpath);
+
+ /*
+ * Read path followed by mode. Ignore any trailing text.
+ * A ' # comment' would be legal. Technically other text is not
+ * legal, we could check for that if we cared to.
+ */
+ for (n_parts = 1, p = path; *p; p++) {
+ if (*p != ' ')
+ continue;
+ *p = '\0';
+
+ if (n_parts != 1)
+ break;
+ p++;
+ n_parts++;
+
+ while (*p == ' ')
+ p++;
+
+ mode = p;
+
+ if (*p == '\0')
+ return ret_set_errno(-1, EINVAL);
+ }
+
+ if (device_cgroup_parse_access(device, mode) < 0)
+ return -1;
+
+ if (n_parts == 1)
+ return ret_set_errno(-1, EINVAL);
+
+ ret = stat(path, &sb);
+ if (ret < 0)
+ return ret_set_errno(-1, errno);
+
+ mode_t m = sb.st_mode & S_IFMT;
+ switch (m) {
+ case S_IFBLK:
+ device->type = 'b';
+ break;
+ case S_IFCHR:
+ device->type = 'c';
+ break;
+ default:
+ return log_error_errno(-1, EINVAL, "Unsupported device type %i for \"%s\"", m, path);
+ }
+
+ device->major = MAJOR(sb.st_rdev);
+ device->minor = MINOR(sb.st_rdev);
+ device->allow = 1;
+ device->global_rule = LXC_BPF_DEVICE_CGROUP_LOCAL_RULE;
+
+ return 0;
+}
+
+static int convert_devpath(const char *invalue, char *dest)
+{
+ struct device_item device = {0};
+ int ret;
+
+ ret = device_cgroup_rule_parse_devpath(&device, invalue);
+ if (ret < 0)
+ return -1;
+
+ ret = snprintf(dest, 50, "%c %d:%d %s", device.type, device.major,
+ device.minor, device.access);
+ if (ret < 0 || ret >= 50)
+ return log_error_errno(-1, ENAMETOOLONG, "Error on configuration value \"%c %d:%d %s\" (max 50 chars)",
+ device.type, device.major, device.minor, device.access);
+
+ return 0;
+}
+
+/* Called from setup_limits - here we have the container's cgroup_data because
+ * we created the cgroups.
+ */
+static int isulad_cg_legacy_get_data(struct cgroup_ops *ops, const char *filename,
+ char *value, size_t len)
+{
+ char *fullpath = NULL;
+ char *p = NULL;
+ struct hierarchy *h = NULL;
+ int ret = 0;
+ char *controller = NULL;
+
+ len = strlen(filename);
+ if (SIZE_MAX - 1 < len) {
+ errno = EINVAL;
+ return -1;
+ }
+ controller = calloc(1, len + 1);
+ if (controller == NULL) {
+ errno = ENOMEM;
+ return -1;
+ }
+ (void)strlcpy(controller, filename, len + 1);
+
+ p = strchr(controller, '.');
+ if (p)
+ *p = '\0';
+
+
+ h = get_hierarchy(ops, controller);
+ if (!h) {
+ ERROR("Failed to setup limits for the \"%s\" controller. "
+ "The controller seems to be unused by \"cgfsng\" cgroup "
+ "driver or not enabled on the cgroup hierarchy",
+ controller);
+ errno = ENOENT;
+ free(controller);
+ return -ENOENT;
+ }
+
+ fullpath = must_make_path(h->container_full_path, filename, NULL);
+ ret = lxc_read_from_file(fullpath, value, len);
+ free(fullpath);
+ free(controller);
+ return ret;
+}
+
+static int isulad_cg_legacy_set_data(struct cgroup_ops *ops, const char *filename,
+ const char *value)
+{
+ size_t len;
+ char *fullpath, *p;
+ /* "b|c <2^64-1>:<2^64-1> r|w|m" = 47 chars max */
+ char converted_value[50];
+ struct hierarchy *h;
+ int ret = 0;
+ char *controller = NULL;
+ int retry_count = 0;
+ int max_retry = 10;
+ char *container_cgroup = ops->container_cgroup;
+
+ len = strlen(filename);
+ controller = alloca(len + 1);
+ (void)strlcpy(controller, filename, len + 1);
+
+ p = strchr(controller, '.');
+ if (p)
+ *p = '\0';
+
+ if (strcmp("devices.allow", filename) == 0 && value[0] == '/') {
+ ret = convert_devpath(value, converted_value);
+ if (ret < 0)
+ return ret;
+ value = converted_value;
+ }
+
+ h = get_hierarchy(ops, controller);
+ if (!h) {
+ ERROR("Failed to setup limits for the \"%s\" controller. "
+ "The controller seems to be unused by \"cgfsng\" cgroup "
+ "driver or not enabled on the cgroup hierarchy",
+ controller);
+ errno = ENOENT;
+ return -ENOENT;
+ }
+
+ fullpath = must_make_path(h->container_full_path, filename, NULL);
+
+retry:
+ ret = lxc_write_to_file(fullpath, value, strlen(value), false, 0666);
+ if (ret != 0) {
+ if (retry_count < max_retry) {
+ SYSERROR("setting cgroup config for ready process caused \"failed to write %s to %s\".", value, fullpath);
+ (void)isulad_cg_legacy_handle_cpuset_hierarchy(h, container_cgroup);
+ (void)isulad_mkdir_eexist_on_last(h->container_full_path, 0755);
+ usleep(100 * 1000); /* 100 millisecond */
+ retry_count++;
+ goto retry;
+ }
+ lxc_write_error_message(ops->errfd,
+ "%s:%d: setting cgroup config for ready process caused \"failed to write %s to %s: %s\".",
+ __FILE__, __LINE__, value, fullpath, strerror(errno));
+ }
+ free(fullpath);
+ return ret;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_setup_limits_legacy(struct cgroup_ops *ops,
+ struct lxc_conf *conf,
+ bool do_devices)
+{
+ __do_free struct lxc_list *sorted_cgroup_settings = NULL;
+ struct lxc_list *cgroup_settings = &conf->cgroup;
+ struct lxc_list *iterator, *next;
+ struct lxc_cgroup *cg;
+ bool ret = false;
+ char value[21 + 1] = { 0 };
+ long long int readvalue, setvalue;
+
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ if (!conf)
+ return ret_set_errno(false, EINVAL);
+
+ cgroup_settings = &conf->cgroup;
+ if (lxc_list_empty(cgroup_settings))
+ return true;
+
+ if (!ops->hierarchies)
+ return ret_set_errno(false, EINVAL);
+
+ sorted_cgroup_settings = sort_cgroup_settings(cgroup_settings);
+ if (!sorted_cgroup_settings)
+ return false;
+
+ lxc_list_for_each(iterator, sorted_cgroup_settings) {
+ cg = iterator->elem;
+
+ if (do_devices == !strncmp("devices", cg->subsystem, 7)) {
+ const char *cgvalue = cg->value;
+ if (strcmp(cg->subsystem, "files.limit") == 0) {
+ if (lxc_safe_long_long(cgvalue, &setvalue) != 0) {
+ SYSERROR("Invalid integer value %s", cgvalue);
+ goto out;
+ }
+ if (setvalue <= 0) {
+ cgvalue = "max";
+ }
+ }
+ if (isulad_cg_legacy_set_data(ops, cg->subsystem, cgvalue)) {
+ if (do_devices && (errno == EACCES || errno == EPERM)) {
+ SYSWARN("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue);
+ continue;
+ }
+ SYSERROR("Failed to set \"%s\" to \"%s\"", cg->subsystem, cgvalue);
+ goto out;
+ }
+ DEBUG("Set controller \"%s\" set to \"%s\"", cg->subsystem, cgvalue);
+ }
+
+ // isulad: check cpu shares
+ if (strcmp(cg->subsystem, "cpu.shares") == 0) {
+ if (isulad_cg_legacy_get_data(ops, cg->subsystem, value, sizeof(value) - 1) < 0) {
+ SYSERROR("Error get %s", cg->subsystem);
+ goto out;
+ }
+ trim(value);
+ if (lxc_safe_long_long(cg->value, &setvalue) != 0) {
+ SYSERROR("Invalid value %s", cg->value);
+ goto out;
+ }
+ if (lxc_safe_long_long(value, &readvalue) != 0) {
+ SYSERROR("Invalid value %s", value);
+ goto out;
+ }
+ if (setvalue > readvalue) {
+ ERROR("The maximum allowed cpu-shares is %s", value);
+ lxc_write_error_message(ops->errfd,
+ "%s:%d: setting cgroup config for ready process caused \"The maximum allowed cpu-shares is %s\".",
+ __FILE__, __LINE__, value);
+ goto out;
+ } else if (setvalue < readvalue) {
+ ERROR("The minimum allowed cpu-shares is %s", value);
+ lxc_write_error_message(ops->errfd,
+ "%s:%d: setting cgroup config for ready process caused \"The minimum allowed cpu-shares is %s\".",
+ __FILE__, __LINE__, value);
+ goto out;
+ }
+ }
+ }
+
+ ret = true;
+ INFO("Limits for the legacy cgroup hierarchies have been setup");
+out:
+ lxc_list_for_each_safe(iterator, sorted_cgroup_settings, next) {
+ lxc_list_del(iterator);
+ free(iterator);
+ }
+
+ return ret;
+}
+
+/*
+ * Some of the parsing logic comes from the original cgroup device v1
+ * implementation in the kernel.
+ */
+static int bpf_device_cgroup_prepare(struct cgroup_ops *ops,
+ struct lxc_conf *conf, const char *key,
+ const char *val)
+{
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+ struct device_item device_item = {0};
+ int ret;
+
+ if (strcmp("devices.allow", key) == 0 && *val == '/')
+ ret = device_cgroup_rule_parse_devpath(&device_item, val);
+ else
+ ret = device_cgroup_rule_parse(&device_item, key, val);
+ if (ret < 0)
+ return log_error_errno(-1, EINVAL, "Failed to parse device string %s=%s", key, val);
+
+ ret = bpf_list_add_device(conf, &device_item);
+ if (ret < 0)
+ return -1;
+#endif
+ return 0;
+}
+
+__cgfsng_ops static bool isulad_cgfsng_setup_limits(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+ struct lxc_list *cgroup_settings, *iterator;
+ struct hierarchy *h;
+ struct lxc_conf *conf;
+
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ if (!ops->hierarchies)
+ return true;
+
+ if (!ops->container_cgroup)
+ return ret_set_errno(false, EINVAL);
+
+ if (!handler || !handler->conf)
+ return ret_set_errno(false, EINVAL);
+ conf = handler->conf;
+
+ if (lxc_list_empty(&conf->cgroup2))
+ return true;
+ cgroup_settings = &conf->cgroup2;
+
+ if (!ops->unified)
+ return false;
+ h = ops->unified;
+
+ lxc_list_for_each (iterator, cgroup_settings) {
+ struct lxc_cgroup *cg = iterator->elem;
+ int ret;
+
+ if (strncmp("devices", cg->subsystem, 7) == 0) {
+ ret = bpf_device_cgroup_prepare(ops, conf, cg->subsystem,
+ cg->value);
+ } else {
+ ret = lxc_write_openat(h->container_full_path,
+ cg->subsystem, cg->value,
+ strlen(cg->value));
+ if (ret < 0)
+ return log_error_errno(false, errno, "Failed to set \"%s\" to \"%s\"",
+ cg->subsystem, cg->value);
+ }
+ TRACE("Set \"%s\" to \"%s\"", cg->subsystem, cg->value);
+ }
+
+ return log_info(true, "Limits for the unified cgroup hierarchy have been setup");
+}
+
+__cgfsng_ops bool isulad_cgfsng_devices_activate(struct cgroup_ops *ops,
+ struct lxc_handler *handler)
+{
+#ifdef HAVE_STRUCT_BPF_CGROUP_DEV_CTX
+ __do_bpf_program_free struct bpf_program *devices = NULL;
+ int ret;
+ struct lxc_conf *conf;
+ struct hierarchy *unified;
+ struct lxc_list *it;
+ struct bpf_program *devices_old;
+
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ if (!ops->hierarchies)
+ return true;
+
+ if (!ops->container_cgroup)
+ return ret_set_errno(false, EEXIST);
+
+ if (!handler || !handler->conf)
+ return ret_set_errno(false, EINVAL);
+ conf = handler->conf;
+
+ unified = ops->unified;
+ if (!unified || !unified->bpf_device_controller ||
+ !unified->container_full_path || lxc_list_empty(&conf->devices))
+ return true;
+
+ devices = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE);
+ if (!devices)
+ return log_error_errno(false, ENOMEM, "Failed to create new bpf program");
+
+ ret = bpf_program_init(devices);
+ if (ret)
+ return log_error_errno(false, ENOMEM, "Failed to initialize bpf program");
+
+ lxc_list_for_each(it, &conf->devices) {
+ struct device_item *cur = it->elem;
+
+ ret = bpf_program_append_device(devices, cur);
+ if (ret)
+ return log_error_errno(false, ENOMEM, "Failed to add new rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+ cur->type,
+ cur->major,
+ cur->minor,
+ cur->access,
+ cur->allow,
+ cur->global_rule);
+ TRACE("Added rule to bpf device program: type %c, major %d, minor %d, access %s, allow %d, global_rule %d",
+ cur->type,
+ cur->major,
+ cur->minor,
+ cur->access,
+ cur->allow,
+ cur->global_rule);
+ }
+
+ ret = bpf_program_finalize(devices);
+ if (ret)
+ return log_error_errno(false, ENOMEM, "Failed to finalize bpf program");
+
+ ret = bpf_program_cgroup_attach(devices, BPF_CGROUP_DEVICE,
+ unified->container_full_path,
+ BPF_F_ALLOW_MULTI);
+ if (ret)
+ return log_error_errno(false, ENOMEM, "Failed to attach bpf program");
+
+ /* Replace old bpf program. */
+ devices_old = move_ptr(conf->cgroup2_devices);
+ conf->cgroup2_devices = move_ptr(devices);
+ devices = move_ptr(devices_old);
+#endif
+ return true;
+}
+
+bool __cgfsng_delegate_controllers(struct cgroup_ops *ops, const char *cgroup)
+{
+ __do_free char *add_controllers = NULL, *base_path = NULL;
+ __do_free_string_list char **parts = NULL;
+ struct hierarchy *unified = ops->unified;
+ ssize_t parts_len;
+ char **it;
+ size_t full_len = 0;
+
+ if (!ops->hierarchies || !pure_unified_layout(ops) ||
+ !unified->controllers[0])
+ return true;
+
+ /* For now we simply enable all controllers that we have detected by
+ * creating a string like "+memory +pids +cpu +io".
+ * TODO: In the near future we might want to support "-<controller>"
+ * etc. but whether supporting semantics like this make sense will need
+ * some thinking.
+ */
+ for (it = unified->controllers; it && *it; it++) {
+ full_len += strlen(*it) + 2;
+ add_controllers = must_realloc(add_controllers, full_len + 1);
+
+ if (unified->controllers[0] == *it)
+ add_controllers[0] = '\0';
+
+ (void)strlcat(add_controllers, "+", full_len + 1);
+ (void)strlcat(add_controllers, *it, full_len + 1);
+
+ if ((it + 1) && *(it + 1))
+ (void)strlcat(add_controllers, " ", full_len + 1);
+ }
+
+ parts = lxc_string_split(cgroup, '/');
+ if (!parts)
+ return false;
+
+ parts_len = lxc_array_len((void **)parts);
+ if (parts_len > 0)
+ parts_len--;
+
+ base_path = must_make_path(unified->mountpoint, unified->container_base_path, NULL);
+ for (ssize_t i = -1; i < parts_len; i++) {
+ int ret;
+ __do_free char *target = NULL;
+
+ if (i >= 0)
+ base_path = must_append_path(base_path, parts[i], NULL);
+ target = must_make_path(base_path, "cgroup.subtree_control", NULL);
+ ret = lxc_writeat(-1, target, add_controllers, full_len);
+ if (ret < 0)
+ return log_error_errno(false, errno, "Could not enable \"%s\" controllers in the unified cgroup \"%s\"",
+ add_controllers, target);
+ TRACE("Enable \"%s\" controllers in the unified cgroup \"%s\"", add_controllers, target);
+ }
+
+ return true;
+}
+
+__cgfsng_ops bool isulad_cgfsng_monitor_delegate_controllers(struct cgroup_ops *ops)
+{
+ return true;
+}
+
+__cgfsng_ops bool isulad_cgfsng_payload_delegate_controllers(struct cgroup_ops *ops)
+{
+ if (!ops)
+ return ret_set_errno(false, ENOENT);
+
+ return __cgfsng_delegate_controllers(ops, ops->container_cgroup);
+}
+
+static bool cgroup_use_wants_controllers(const struct cgroup_ops *ops,
+ char **controllers)
+{
+ if (!ops->cgroup_use)
+ return true;
+
+ for (char **cur_ctrl = controllers; cur_ctrl && *cur_ctrl; cur_ctrl++) {
+ bool found = false;
+
+ for (char **cur_use = ops->cgroup_use; cur_use && *cur_use; cur_use++) {
+ if (strcmp(*cur_use, *cur_ctrl) != 0)
+ continue;
+
+ found = true;
+ break;
+ }
+
+ if (found)
+ continue;
+
+ return false;
+ }
+
+ return true;
+}
+
+static void cg_unified_delegate(char ***delegate)
+{
+ __do_free char *buf = NULL;
+ char *standard[] = {"cgroup.subtree_control", "cgroup.threads", NULL};
+ char *token;
+ int idx;
+
+ buf = read_file("/sys/kernel/cgroup/delegate");
+ if (!buf) {
+ for (char **p = standard; p && *p; p++) {
+ idx = append_null_to_list((void ***)delegate);
+ (*delegate)[idx] = must_copy_string(*p);
+ }
+ SYSWARN("Failed to read /sys/kernel/cgroup/delegate");
+ return;
+ }
+
+ lxc_iterate_parts (token, buf, " \t\n") {
+ /*
+ * We always need to chown this for both cgroup and
+ * cgroup2.
+ */
+ if (strcmp(token, "cgroup.procs") == 0)
+ continue;
+
+ idx = append_null_to_list((void ***)delegate);
+ (*delegate)[idx] = must_copy_string(token);
+ }
+}
+
+/* At startup, parse_hierarchies finds all the info we need about cgroup
+ * mountpoints and current cgroups, and stores it in @d.
+ */
+static int cg_hybrid_init(struct cgroup_ops *ops, bool relative, bool unprivileged)
+{
+ __do_free char *basecginfo = NULL, *line = NULL;
+ __do_free_string_list char **klist = NULL, **nlist = NULL;
+ __do_fclose FILE *f = NULL;
+ int ret;
+ size_t len = 0;
+
+ /* Root spawned containers escape the current cgroup, so use init's
+ * cgroups as our base in that case.
+ */
+ if (!relative && (geteuid() == 0))
+ basecginfo = read_file("/proc/1/cgroup");
+ else
+ basecginfo = read_file("/proc/self/cgroup");
+ if (!basecginfo)
+ return ret_set_errno(-1, ENOMEM);
+
+ ret = get_existing_subsystems(&klist, &nlist);
+ if (ret < 0)
+ return log_error_errno(-1, errno, "Failed to retrieve available legacy cgroup controllers");
+
+ f = fopen("/proc/self/mountinfo", "re");
+ if (!f)
+ return log_error_errno(-1, errno, "Failed to open \"/proc/self/mountinfo\"");
+
+ lxc_cgfsng_print_basecg_debuginfo(basecginfo, klist, nlist);
+
+ while (getline(&line, &len, f) != -1) {
+ __do_free char *base_cgroup = NULL, *mountpoint = NULL;
+ __do_free_string_list char **controller_list = NULL;
+ int type;
+ struct hierarchy *new;
+
+ type = get_cgroup_version(line);
+ if (type == 0)
+ continue;
+
+ if (type == CGROUP2_SUPER_MAGIC && ops->unified)
+ continue;
+
+ if (ops->cgroup_layout == CGROUP_LAYOUT_UNKNOWN) {
+ if (type == CGROUP2_SUPER_MAGIC)
+ ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+ else if (type == CGROUP_SUPER_MAGIC)
+ ops->cgroup_layout = CGROUP_LAYOUT_LEGACY;
+ } else if (ops->cgroup_layout == CGROUP_LAYOUT_UNIFIED) {
+ if (type == CGROUP_SUPER_MAGIC)
+ ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+ } else if (ops->cgroup_layout == CGROUP_LAYOUT_LEGACY) {
+ if (type == CGROUP2_SUPER_MAGIC)
+ ops->cgroup_layout = CGROUP_LAYOUT_HYBRID;
+ }
+
+ controller_list = cg_hybrid_get_controllers(klist, nlist, line, type);
+ if (!controller_list && type == CGROUP_SUPER_MAGIC)
+ continue;
+
+ if (type == CGROUP_SUPER_MAGIC)
+ if (controller_list_is_dup(ops->hierarchies, controller_list)) {
+ TRACE("Skipping duplicating controller");
+ continue;
+ }
+
+ mountpoint = cg_hybrid_get_mountpoint(line);
+ if (!mountpoint) {
+ ERROR("Failed parsing mountpoint from \"%s\"", line);
+ continue;
+ }
+
+ if (type == CGROUP_SUPER_MAGIC)
+ base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, controller_list[0], CGROUP_SUPER_MAGIC);
+ else
+ base_cgroup = cg_hybrid_get_current_cgroup(basecginfo, NULL, CGROUP2_SUPER_MAGIC);
+ if (!base_cgroup) {
+ ERROR("Failed to find current cgroup");
+ continue;
+ }
+
+ trim(base_cgroup);
+ prune_init_scope(base_cgroup);
+
+ /* isulad: do not test writeable, if we run isulad in docker without cgroup namespace.
+ * the base_cgroup will be docker/XXX.., mountpoint+base_cgroup may be not exist */
+
+ /*
+ * reason:base cgroup may be started with /system.slice when cg_hybrid_init
+ * read /proc/1/cgroup on host, and cgroup init will set all containers
+ * cgroup path under /sys/fs/cgroup/<controller>/system.slice/xxx/lxc
+ * directory, this is not consistent with docker. The default cgroup path
+ * should be under /sys/fs/cgroup/<controller>/lxc directory.
+ */
+
+ if (strlen(base_cgroup) > 1 && base_cgroup[0] == '/') {
+ base_cgroup[1] = '\0';
+ }
+
+ if (type == CGROUP2_SUPER_MAGIC) {
+ char *cgv2_ctrl_path;
+
+ cgv2_ctrl_path = must_make_path(mountpoint, base_cgroup,
+ "cgroup.controllers",
+ NULL);
+
+ controller_list = cg_unified_get_controllers(cgv2_ctrl_path);
+ free(cgv2_ctrl_path);
+ if (!controller_list) {
+ controller_list = cg_unified_make_empty_controller();
+ TRACE("No controllers are enabled for "
+ "delegation in the unified hierarchy");
+ }
+ }
+
+ /* Exclude all controllers that cgroup use does not want. */
+ if (!cgroup_use_wants_controllers(ops, controller_list)) {
+ TRACE("Skipping controller");
+ continue;
+ }
+
+ new = add_hierarchy(&ops->hierarchies, move_ptr(controller_list), move_ptr(mountpoint), move_ptr(base_cgroup), type);
+ if (type == CGROUP2_SUPER_MAGIC && !ops->unified) {
+ if (unprivileged)
+ cg_unified_delegate(&new->cgroup2_chown);
+ ops->unified = new;
+ }
+ }
+
+ TRACE("Writable cgroup hierarchies:");
+ lxc_cgfsng_print_hierarchies(ops);
+
+ /* verify that all controllers in cgroup.use and all crucial
+ * controllers are accounted for
+ */
+ if (!all_controllers_found(ops))
+ return log_error_errno(-1, ENOENT, "Failed to find all required controllers");
+
+ return 0;
+}
+
+/* Get current cgroup from /proc/self/cgroup for the cgroupfs v2 hierarchy. */
+static char *cg_unified_get_current_cgroup(bool relative)
+{
+ __do_free char *basecginfo = NULL;
+ char *copy;
+ char *base_cgroup;
+
+ if (!relative && (geteuid() == 0))
+ basecginfo = read_file("/proc/1/cgroup");
+ else
+ basecginfo = read_file("/proc/self/cgroup");
+ if (!basecginfo)
+ return NULL;
+
+ base_cgroup = strstr(basecginfo, "0::/");
+ if (!base_cgroup)
+ return NULL;
+
+ base_cgroup = base_cgroup + 3;
+ copy = copy_to_eol(base_cgroup);
+ if (!copy)
+ return NULL;
+
+ return trim(copy);
+}
+
+static int cg_unified_init(struct cgroup_ops *ops, bool relative,
+ bool unprivileged)
+{
+ __do_free char *subtree_path = NULL;
+ int ret;
+ char *mountpoint;
+ char **delegatable;
+ struct hierarchy *new;
+ char *base_cgroup = NULL;
+
+ ret = unified_cgroup_hierarchy();
+ if (ret == -ENOMEDIUM)
+ return ret_errno(ENOMEDIUM);
+
+ if (ret != CGROUP2_SUPER_MAGIC)
+ return 0;
+
+ base_cgroup = cg_unified_get_current_cgroup(relative);
+ if (!base_cgroup)
+ return ret_errno(EINVAL);
+ if (!relative)
+ prune_init_scope(base_cgroup);
+
+ /*
+ * We assume that the cgroup we're currently in has been delegated to
+ * us and we are free to further delege all of the controllers listed
+ * in cgroup.controllers further down the hierarchy.
+ */
+ mountpoint = must_copy_string(DEFAULT_CGROUP_MOUNTPOINT);
+ subtree_path = must_make_path(mountpoint, base_cgroup, "cgroup.controllers", NULL);
+ delegatable = cg_unified_get_controllers(subtree_path);
+ if (!delegatable)
+ delegatable = cg_unified_make_empty_controller();
+ if (!delegatable[0])
+ TRACE("No controllers are enabled for delegation");
+
+ /* TODO: If the user requested specific controllers via lxc.cgroup.use
+ * we should verify here. The reason I'm not doing it right is that I'm
+ * not convinced that lxc.cgroup.use will be the future since it is a
+ * global property. I much rather have an option that lets you request
+ * controllers per container.
+ */
+
+ new = add_hierarchy(&ops->hierarchies, delegatable, mountpoint, base_cgroup, CGROUP2_SUPER_MAGIC);
+ if (unprivileged)
+ cg_unified_delegate(&new->cgroup2_chown);
+
+ if (bpf_devices_cgroup_supported())
+ new->bpf_device_controller = 1;
+
+ ops->cgroup_layout = CGROUP_LAYOUT_UNIFIED;
+ ops->unified = new;
+
+ return CGROUP2_SUPER_MAGIC;
+}
+
+static int isulad_cg_init(struct cgroup_ops *ops, struct lxc_conf *conf)
+{
+ int ret;
+ const char *tmp;
+ bool relative = conf->cgroup_meta.relative;
+
+ tmp = lxc_global_config_value("lxc.cgroup.use");
+ if (tmp) {
+ __do_free char *pin = NULL;
+ char *chop, *cur;
+
+ pin = must_copy_string(tmp);
+ chop = pin;
+
+ lxc_iterate_parts(cur, chop, ",")
+ must_append_string(&ops->cgroup_use, cur);
+ }
+
+ ret = cg_unified_init(ops, relative, !lxc_list_empty(&conf->id_map));
+ if (ret < 0)
+ return -1;
+
+ if (ret == CGROUP2_SUPER_MAGIC)
+ return 0;
+
+ return cg_hybrid_init(ops, relative, !lxc_list_empty(&conf->id_map));
+}
+
+__cgfsng_ops static int isulad_cgfsng_data_init(struct cgroup_ops *ops, struct lxc_conf *conf)
+{
+ const char *cgroup_pattern;
+ const char *cgroup_tree;
+ __do_free char *container_cgroup = NULL, *__cgroup_tree = NULL;
+ size_t len;
+
+ if (!ops)
+ return ret_set_errno(-1, ENOENT);
+
+ /* copy system-wide cgroup information */
+ cgroup_pattern = lxc_global_config_value("lxc.cgroup.pattern");
+ if (cgroup_pattern && strcmp(cgroup_pattern, "") != 0)
+ ops->cgroup_pattern = must_copy_string(cgroup_pattern);
+
+ if (conf->cgroup_meta.dir) {
+ cgroup_tree = conf->cgroup_meta.dir;
+ container_cgroup = must_concat(&len, cgroup_tree, "/", conf->name, NULL);
+ } else if (ops->cgroup_pattern) {
+ __cgroup_tree = lxc_string_replace("%n", conf->name, ops->cgroup_pattern);
+ if (!__cgroup_tree)
+ return ret_set_errno(-1, ENOMEM);
+
+ cgroup_tree = __cgroup_tree;
+ container_cgroup = must_concat(&len, cgroup_tree, NULL);
+ } else {
+ cgroup_tree = NULL;
+ container_cgroup = must_concat(&len, conf->name, NULL);
+ }
+ if (!container_cgroup)
+ return ret_set_errno(-1, ENOMEM);
+
+ ops->container_cgroup = move_ptr(container_cgroup);
+
+ return 0;
+}
+
+struct cgroup_ops *cgfsng_ops_init(struct lxc_conf *conf)
+{
+ __do_free struct cgroup_ops *cgfsng_ops = NULL;
+
+ cgfsng_ops = malloc(sizeof(struct cgroup_ops));
+ if (!cgfsng_ops)
+ return ret_set_errno(NULL, ENOMEM);
+
+ memset(cgfsng_ops, 0, sizeof(struct cgroup_ops));
+ cgfsng_ops->cgroup_layout = CGROUP_LAYOUT_UNKNOWN;
+
+ if (isulad_cg_init(cgfsng_ops, conf))
+ return NULL;
+
+ cgfsng_ops->data_init = isulad_cgfsng_data_init;
+
+ cgfsng_ops->errfd = conf ? conf->errpipe[1] : -1;
+ cgfsng_ops->get_cgroup_full_path = isulad_cgfsng_get_cgroup_full_path;
+ cgfsng_ops->payload_destroy = isulad_cgfsng_payload_destroy;
+ cgfsng_ops->monitor_destroy = isulad_cgfsng_monitor_destroy;
+ cgfsng_ops->monitor_create = isulad_cgfsng_monitor_create;
+ cgfsng_ops->monitor_enter = isulad_cgfsng_monitor_enter;
+ cgfsng_ops->monitor_delegate_controllers = isulad_cgfsng_monitor_delegate_controllers;
+ cgfsng_ops->payload_delegate_controllers = isulad_cgfsng_payload_delegate_controllers;
+ cgfsng_ops->payload_create = isulad_cgfsng_payload_create;
+ cgfsng_ops->payload_enter = isulad_cgfsng_payload_enter;
+ cgfsng_ops->payload_finalize = isulad_cgfsng_payload_finalize;
+ cgfsng_ops->escape = isulad_cgfsng_escape;
+ cgfsng_ops->num_hierarchies = isulad_cgfsng_num_hierarchies;
+ cgfsng_ops->get_hierarchies = isulad_cgfsng_get_hierarchies;
+ cgfsng_ops->get_cgroup = isulad_cgfsng_get_cgroup;
+ cgfsng_ops->get = isulad_cgfsng_get;
+ cgfsng_ops->set = isulad_cgfsng_set;
+ cgfsng_ops->freeze = isulad_cgfsng_freeze;
+ cgfsng_ops->unfreeze = isulad_cgfsng_unfreeze;
+ cgfsng_ops->setup_limits_legacy = isulad_cgfsng_setup_limits_legacy;
+ cgfsng_ops->setup_limits = isulad_cgfsng_setup_limits;
+ cgfsng_ops->driver = "isulad_cgfsng";
+ cgfsng_ops->version = "1.0.0";
+ cgfsng_ops->attach = isulad_cgfsng_attach;
+ cgfsng_ops->chown = isulad_cgfsng_chown;
+ cgfsng_ops->mount = isulad_cgfsng_mount;
+ cgfsng_ops->devices_activate = isulad_cgfsng_devices_activate;
+
+ return move_ptr(cgfsng_ops);
+}
--
2.25.1
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/hongkeyang/lxc.git
[email protected]:hongkeyang/lxc.git
hongkeyang
lxc
lxc
master

搜索帮助