8 Star 1 Fork 7

src-anolis-os/rasdaemon

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
1075-rasdaemon-enhance-rasdaemon-event-trigger.patch 25.31 KB
一键复制 编辑 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923
From faaca73f3cb70ab4baba9d00eefe4d9cde14e033 Mon Sep 17 00:00:00 2001
From: Ruidong Tian <[email protected]>
Date: Fri, 7 Jun 2024 11:26:06 +0800
Subject: [PATCH 75/85] rasdaemon: enhance rasdaemon event trigger
- Add trigger timeout to avoid trigger hang.
- Move all trigger code to trigger.c
Use $(TRIGGER_NAME)_TIMEOUT to set trigger timeout val, for example:
MC_CE_TRIGGER: The script executed when corrected mc_event occurs.
MC_CE_TRIGGER_TIMEOUT: Timeout(seconds) for MC_CE_TRIGGER, set 0 to
delete timeout.
Signed-off-by: Ruidong Tian <[email protected]>
---
Makefile.am | 2 +-
contrib/aer_trigger | 18 ++
contrib/mce_record_trigger | 36 ++++
contrib/mem_fail_trigger | 16 ++
contrib/mem_fail_trigger.sh | 12 --
misc/rasdaemon.env | 26 +++
ras-aer-handler.c | 3 +
ras-events.c | 16 --
ras-mc-handler.c | 90 +-------
ras-mce-handler.c | 8 +-
ras-memory-failure-handler.c | 55 +----
trigger.c | 387 ++++++++++++++++++++++++++++++++---
trigger.h | 19 +-
13 files changed, 484 insertions(+), 204 deletions(-)
create mode 100755 contrib/aer_trigger
create mode 100755 contrib/mce_record_trigger
create mode 100755 contrib/mem_fail_trigger
delete mode 100755 contrib/mem_fail_trigger.sh
diff --git a/Makefile.am b/Makefile.am
index 91aab1e..fb0248e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -106,4 +106,4 @@ install-data-local:
$(install_sh) -d "$(DESTDIR)@sysconfdir@/ras/dimm_labels.d"
$(install_sh) -d "$(DESTDIR)@sysconfdir@/ras/triggers"
$(install_sh) @abs_srcdir@/misc/rasdaemon.env "$(DESTDIR)@SYSCONFDEFDIR@/rasdaemon"
- $(install_sh) @abs_srcdir@/contrib/mc_event_trigger "$(DESTDIR)@sysconfdir@/ras/triggers/mc_event_trigger"
+ $(install_sh) @abs_srcdir@/contrib/*_trigger "$(DESTDIR)@sysconfdir@/ras/triggers/"
diff --git a/contrib/aer_trigger b/contrib/aer_trigger
new file mode 100755
index 0000000..982ff01
--- /dev/null
+++ b/contrib/aer_trigger
@@ -0,0 +1,18 @@
+#!/bin/sh
+# This shell script can be executed by rasdaemon in daemon mode when a
+# memory_failure_event is occured, environment variables include all
+# information reported by tracepoint.
+
+# environment:
+# TIMESTAMP Timestamp when error occurred
+# ERROR_TYPE Corrected | Uncorrected (Non-Fatal) | Uncorrected (Fatal)
+# DEV_NAME BDF
+# TLP_HEADER_VALID
+# TLP_HEADER
+# MSG
+#
+
+[ -x ./aer_trigger.local ] && . ./aer_trigger.local
+
+
+exit 0
diff --git a/contrib/mce_record_trigger b/contrib/mce_record_trigger
new file mode 100755
index 0000000..06a52d9
--- /dev/null
+++ b/contrib/mce_record_trigger
@@ -0,0 +1,36 @@
+#!/bin/sh
+# This shell script can be executed by rasdaemon in daemon mode when a
+# mc_event is occured, environment variables include all information
+# reported by tracepoint.
+#
+# environment:
+# MCGCAP MCGCAP MSR: machine check capabilities of CPU
+# MCGSTATUS Machine Check Global Status MSR
+# STATUS Bank's MCi_STATUS MSR
+# ADDR Bank's MCi_ADDR MSR
+# MISC Bank's MCi_MISC MSR
+# IP Instruction Pointer when the error happened
+# TSC CPU time stamp counter
+# WALLTIME Wall time_t when error was detected
+# CPU CPU number; obsoleted by extcpu
+# CPUID CPUID 1 EAX
+# APICID CPU initial APIC ID
+# SOCKETID CPU socket ID
+# CS Code segment
+# BANK Machine check bank reporting the error
+# CPUVENDOR Kernel's X86_VENDOR enum
+# SYND MCA_SYND MSR: only valid on SMCA systems
+# IPID MCA_IPID MSR: only valid on SMCA systems
+# TIMESTAMP Rasdaemon timestamp
+# BANK_NAME Decode ban name
+# ERROR_MSG Vendor define error message
+# MCGSTATUS_MSG Decode mcgstatus
+# MCISTATUS_MSG Decode mcistatus
+# MCASTATUS_MSG Decode mcastatus
+# USER_ACTION Recommendations for actions users should take
+# MC_LOCATION Error location in MC
+#
+
+[ -x ./mce_record_trigger.local ] && . ./mce_record_trigger.local
+
+exit 0
diff --git a/contrib/mem_fail_trigger b/contrib/mem_fail_trigger
new file mode 100755
index 0000000..ee44227
--- /dev/null
+++ b/contrib/mem_fail_trigger
@@ -0,0 +1,16 @@
+#!/bin/sh
+# This shell script can be executed by rasdaemon in daemon mode when a
+# memory_failure_event is occured, environment variables include all
+# information reported by tracepoint.
+
+# environment:
+# TIMESTAMP Timestamp when error occurred
+# PFN Offlined page PFN
+# PAGE_TYPE Page type
+# ACTION_RESULT Action result
+#
+
+[ -x ./mf_trigger.local ] && . ./mf_trigger.local
+
+
+exit 0
diff --git a/contrib/mem_fail_trigger.sh b/contrib/mem_fail_trigger.sh
deleted file mode 100755
index a3ac362..0000000
--- a/contrib/mem_fail_trigger.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh
-# This shell script can be executed by rasdaemon in daemon mode when a
-# memory_failure_event is occured, environment variables include all
-# information reported by tracepoint.
-#
-
-echo TIMESTAMP: $TIMESTAMP
-echo PFN: $PFN
-echo PAGE_TYPE: $PAGE_TYPE
-echo ACTION_RESULT: $ACTION_RESULT
-
-exit 0
diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env
index 3389a73..9293038 100644
--- a/misc/rasdaemon.env
+++ b/misc/rasdaemon.env
@@ -55,8 +55,34 @@ TRIGGER_DIR=
# Execute these triggers when the mc_event occured, the triggers will not
# be executed if the trigger is not specified.
+# You can set timeout for trigger, trigger thread will be killed if timeout.
+# The default timeout is 1, if you do not want any timeout, set it to 0.
# For example:
# MC_CE_TRIGGER=mc_event_trigger
# MC_UE_TRIGGER=mc_event_trigger
+# MC_CE_TRIGGER_TIMEOUT=1
+# MC_UE_TRIGGER_TIMEOUT=1
+
+# trigger for mc_event
MC_CE_TRIGGER=
MC_UE_TRIGGER=
+MC_CE_TRIGGER_TIMEOUT=0
+MC_UE_TRIGGER_TIMEOUT=0
+
+MCE_CE_TRIGGER=
+MCE_DE_TRIGGER=
+MCE_UE_TRIGGER=
+MCE_CE_TRIGGER_TIMEOUT=0
+MCE_DE_TRIGGER_TIMEOUT=0
+MCE_UE_TRIGGER_TIMEOUT=0
+
+MF_TRIGGER=
+MF_TRIGGER_TIMEOUT=0
+
+AER_CE_TRIGGER=
+AER_UE_TRIGGER=
+AER_FATAL_TRIGGER=
+AER_CE_TRIGGER_TIMEOUT=0
+AER_UE_TRIGGER_TIMEOUT=0
+AER_FATAL_TRIGGER_TIMEOUT=0
+
diff --git a/ras-aer-handler.c b/ras-aer-handler.c
index 40c60bb..b00703e 100644
--- a/ras-aer-handler.c
+++ b/ras-aer-handler.c
@@ -25,6 +25,7 @@
#include "ras-logger.h"
#include "bitfield.h"
#include "ras-report.h"
+#include "trigger.h"
/* bit field meaning for correctable error */
static const char *aer_cor_errors[32] = {
@@ -183,5 +184,7 @@ int ras_aer_event_handler(struct trace_seq *s,
system(ipmi_add_sel);
#endif
+ run_aer_event_trigger(&ev);
+
return 0;
}
diff --git a/ras-events.c b/ras-events.c
index 2411cf7..f944847 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -64,11 +64,6 @@
extern char* choices_disable;
-static const struct event_trigger event_triggers[] = {
- { "mc_event", &mc_event_trigger_setup },
- { "memory_failure_event", &mem_fail_event_trigger_setup },
-};
-
static int get_debugfs_dir(char *tracing_dir, size_t len)
{
FILE *fp;
@@ -315,17 +310,6 @@ free_ras:
return 0;
}
-static void setup_event_trigger(char *event)
-{
- struct event_trigger trigger;
-
- for (int i = 0; i < ARRAY_SIZE(event_triggers); i++) {
- trigger = event_triggers[i];
- if (!strcmp(event, trigger.name))
- trigger.setup();
- }
-}
-
/*
* Set kernel filter. libtrace doesn't provide an API for setting filters
* in kernel, we have to implement it here.
diff --git a/ras-mc-handler.c b/ras-mc-handler.c
index c438771..a270637 100644
--- a/ras-mc-handler.c
+++ b/ras-mc-handler.c
@@ -23,6 +23,7 @@
#include <string.h>
#include <unistd.h>
#include "libtrace/kbuffer.h"
+#include <assert.h>
#include "ras-mc-handler.h"
#include "ras-logger.h"
#include "ras-page-isolation.h"
@@ -30,89 +31,6 @@
#include "ras-report.h"
#include "trigger.h"
-#define MAX_ENV 30
-static const char *mc_ce_trigger = NULL;
-static const char *mc_ue_trigger = NULL;
-
-void mc_event_trigger_setup(void)
-{
- const char *trigger;
-
- trigger = getenv("MC_CE_TRIGGER");
- if (trigger && strcmp(trigger, "")) {
- mc_ce_trigger = trigger_check(trigger);
-
- if (!mc_ce_trigger) {
- log(ALL, LOG_ERR,
- "Cannot access mc_event ce trigger `%s`\n",
- trigger);
- } else {
- log(ALL, LOG_INFO,
- "Setup mc_event ce trigger `%s`\n",
- trigger);
- }
- }
-
- trigger = getenv("MC_UE_TRIGGER");
- if (trigger && strcmp(trigger, "")) {
- mc_ue_trigger = trigger_check(trigger);
-
- if (!mc_ue_trigger) {
- log(ALL, LOG_ERR,
- "Cannot access mc_event ue trigger `%s`\n",
- trigger);
- } else {
- log(ALL, LOG_INFO,
- "Setup mc_event ue trigger `%s`\n",
- trigger);
- }
- }
-}
-
-static void run_mc_trigger(struct ras_mc_event *ev, const char *mc_trigger)
-{
- char *env[MAX_ENV];
- int ei = 0;
- int i;
-
- if (asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin") < 0)
- goto free;
- if (asprintf(&env[ei++], "TIMESTAMP=%s", ev->timestamp) < 0)
- goto free;
- if (asprintf(&env[ei++], "COUNT=%d", ev->error_count) < 0)
- goto free;
- if (asprintf(&env[ei++], "TYPE=%s", ev->error_type) < 0)
- goto free;
- if (asprintf(&env[ei++], "MESSAGE=%s", ev->msg) < 0)
- goto free;
- if (asprintf(&env[ei++], "LABEL=%s", ev->label) < 0)
- goto free;
- if (asprintf(&env[ei++], "MC_INDEX=%d", ev->mc_index) < 0)
- goto free;
- if (asprintf(&env[ei++], "TOP_LAYER=%d", ev->top_layer) < 0)
- goto free;
- if (asprintf(&env[ei++], "MIDDLE_LAYER=%d", ev->middle_layer) < 0)
- goto free;
- if (asprintf(&env[ei++], "LOWER_LAYER=%d", ev->lower_layer) < 0)
- goto free;
- if (asprintf(&env[ei++], "ADDRESS=%llx", ev->address) < 0)
- goto free;
- if (asprintf(&env[ei++], "GRAIN=%lld", ev->grain) < 0)
- goto free;
- if (asprintf(&env[ei++], "SYNDROME=%llx", ev->syndrome) < 0)
- goto free;
- if (asprintf(&env[ei++], "DRIVER_DETAIL=%s", ev->driver_detail) < 0)
- goto free;
- env[ei] = NULL;
- assert(ei < MAX_ENV);
-
- run_trigger(mc_trigger, NULL, env, "mc_event");
-
-free:
- for (i = 0; i < ei; i++)
- free(env[i]);
-}
-
int ras_mc_event_handler(struct trace_seq *s,
struct pevent_record *record,
struct event_format *event, void *context)
@@ -282,11 +200,7 @@ int ras_mc_event_handler(struct trace_seq *s,
ras_report_mc_event(ras, &ev);
#endif
- if (mc_ce_trigger && !strcmp(ev.error_type, "Corrected"))
- run_mc_trigger(&ev, mc_ce_trigger);
-
- if (mc_ue_trigger && !strcmp(ev.error_type, "Uncorrected"))
- run_mc_trigger(&ev, mc_ue_trigger);
+ run_mc_event_trigger(&ev);
return 0;
diff --git a/ras-mce-handler.c b/ras-mce-handler.c
index f7ab23e..9601704 100644
--- a/ras-mce-handler.c
+++ b/ras-mce-handler.c
@@ -14,8 +14,8 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
+*/
+#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
@@ -23,11 +23,13 @@
#include <string.h>
#include <unistd.h>
#include <stdint.h>
+#include <assert.h>
#include "libtrace/kbuffer.h"
#include "ras-mce-handler.h"
#include "types.h"
#include "ras-logger.h"
#include "ras-report.h"
+#include "trigger.h"
/*
* The code below were adapted from Andi Kleen/Intel/SuSe mcelog code,
@@ -579,5 +581,7 @@ int ras_mce_event_handler(struct trace_seq *s,
ras_report_mce_event(ras, &e);
#endif
+ run_mce_record_trigger(&e);
+
return 0;
}
diff --git a/ras-memory-failure-handler.c b/ras-memory-failure-handler.c
index df427b1..8bc7a9d 100644
--- a/ras-memory-failure-handler.c
+++ b/ras-memory-failure-handler.c
@@ -94,59 +94,6 @@ static const struct {
{ MF_RECOVERED, "Recovered" },
};
-#define MAX_ENV 6
-static const char *mf_trigger = NULL;
-
-void mem_fail_event_trigger_setup(void)
-{
- const char *trigger;
-
- trigger = getenv("MEM_FAIL_TRIGGER");
- if (trigger && strcmp(trigger, "")) {
- mf_trigger = trigger_check(trigger);
-
- if (!mf_trigger) {
- log(ALL, LOG_ERR,
- "Cannot access memory_fail_event trigger `%s`\n",
- trigger);
- } else {
- log(ALL, LOG_INFO,
- "Setup memory_fail_event trigger `%s`\n",
- trigger);
- }
- }
-}
-
-static void run_mf_trigger(struct ras_mf_event *ev)
-{
- char *env[MAX_ENV];
- int ei = 0;
- int i;
-
- if (!mf_trigger)
- return;
-
- if (asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin") < 0)
- goto free;
- if (asprintf(&env[ei++], "TIMESTAMP=%s", ev->timestamp) < 0)
- goto free;
- if (asprintf(&env[ei++], "PFN=%s", ev->pfn) < 0)
- goto free;
- if (asprintf(&env[ei++], "PAGE_TYPE=%s", ev->page_type) < 0)
- goto free;
- if (asprintf(&env[ei++], "ACTION_RESULT=%s", ev->action_result) < 0)
- goto free;
-
- env[ei] = NULL;
- assert(ei < MAX_ENV);
-
- run_trigger(mf_trigger, NULL, env, "memory_fail_event");
-
-free:
- for (i = 0; i < ei; i++)
- free(env[i]);
-}
-
static const char *get_page_type(int page_type)
{
unsigned int i;
@@ -225,7 +172,7 @@ int ras_memory_failure_event_handler(struct trace_seq *s,
/* Report event to ABRT */
ras_report_mf_event(ras, &ev);
#endif
- run_mf_trigger(&ev);
+ run_mf_event_trigger(&ev);
return 0;
}
diff --git a/trigger.c b/trigger.c
index 3031f4b..334d945 100644
--- a/trigger.c
+++ b/trigger.c
@@ -2,54 +2,387 @@
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
+#include <string.h>
#include <sys/wait.h>
#include "ras-logger.h"
+#include "types.h"
#include "trigger.h"
-void run_trigger(const char *trigger, char *argv[], char **env, const char *reporter)
+#include "ras-event.c"
+#include "ras-mce-handler.h"
+
+#define MAX_ENV 30
+static int child_done, alarm_done;
+static char *trigger_dir;
+
+static void child_handler(int sig)
+{
+ child_done = 1;
+}
+
+static void alarm_handler(int sig)
+{
+ alarm_done = 1;
+}
+
+void run_trigger(struct event_trigger *t, char *argv[], char **env)
{
pid_t child;
- int status;
+ char *trigger = t->path;
+ const char *path = t->abs_path;
+ int status, timeout = t->timeout;
- log(SYSLOG, LOG_INFO, "Running trigger `%s' (reporter: %s)\n", trigger, reporter);
+ log(ALL, LOG_INFO, "Running trigger `%s' (reporter: %s)\n",
+ trigger, t->event_name);
child = fork();
if (child < 0) {
- log(SYSLOG, LOG_ERR, "Cannot create process for trigger");
+ log(ALL, LOG_ERR, "Cannot create process for trigger\n");
return;
+ } else if (child == 0) {
+ if (execve(path, argv, env) == -1) {
+ log(ALL, LOG_ERR, "Trigger %s exec fail: %s\n", path, strerror(errno));
+ }
+ _exit(EXIT_FAILURE);
}
- if (child == 0) {
- execve(trigger, argv, env);
- _exit(127);
- } else {
- waitpid(child, &status, 0);
- if (WIFEXITED(status) && WEXITSTATUS(status)) {
- log(SYSLOG, LOG_INFO, "Trigger %s exited with status %d",
- trigger, WEXITSTATUS(status));
- } else if (WIFSIGNALED(status)) {
- log(SYSLOG, LOG_INFO, "Trigger %s killed by signal %d",
- trigger, WTERMSIG(status));
+ signal(SIGCHLD, child_handler);
+ if (timeout) {
+ signal(SIGALRM, alarm_handler);
+ alarm(timeout);
+ }
+ pause();
+
+ if (child_done) {
+ if (waitpid(child, &status, WNOHANG) == child) {
+ if (WIFEXITED(status) && WEXITSTATUS(status))
+ log(ALL, LOG_INFO,
+ "Trigger %s exited with status %d\n",
+ trigger, WEXITSTATUS(status));
+ else if (WIFSIGNALED(status))
+ log(ALL, LOG_INFO,
+ "Trigger %s killed by signal %d\n",
+ trigger, WTERMSIG(status));
}
+ alarm(0);
+ } else if (alarm_done) {
+ log(ALL, LOG_WARNING, "Trigger timeout, kill it\n");
+ kill(child, SIGKILL);
}
+
+ signal(SIGCHLD, SIG_DFL);
+ signal(SIGALRM, SIG_DFL);
}
-const char *trigger_check(const char *s)
+int trigger_check(struct event_trigger *t)
+{
+ if (trigger_dir)
+ if (snprintf(t->abs_path, 256, "%s/%s", trigger_dir, t->path) < 0)
+ return -1;
+
+ return access(t->abs_path, R_OK | X_OK);
+}
+
+struct event_trigger mc_ce_trigger = {"mc_event", "MC_CE_TRIGGER"};
+struct event_trigger mc_ue_trigger = {"mc_event", "MC_UE_TRIGGER"};
+
+struct event_trigger mce_ce_trigger = {"mce_record", "MCE_CE_TRIGGER"};
+struct event_trigger mce_de_trigger = {"mce_record", "MCE_DE_TRIGGER"};
+struct event_trigger mce_ue_trigger = {"mce_record", "MCE_UE_TRIGGER"};
+
+struct event_trigger mf_trigger = {"memory_failure_event", "MEM_FAIL_TRIGGER"};
+
+struct event_trigger aer_ce_trigger = {"aer_event", "AER_CE_TRIGGER"};
+struct event_trigger aer_ue_trigger = {"aer_event", "AER_UE_TRIGGER"};
+struct event_trigger aer_fatal_trigger = {"aer_event", "AER_FATAL_TRIGGER"};
+
+static struct event_trigger *event_triggers[] = {
+ &mc_ce_trigger,
+ &mc_ue_trigger,
+#ifdef HAVE_MCE
+ &mce_ce_trigger,
+ &mce_de_trigger,
+ &mce_ue_trigger,
+#endif
+#ifdef HAVE_MEMORY_FAILURE
+ &mf_trigger,
+#endif
+#ifdef HAVE_AER
+ &aer_ce_trigger,
+ &aer_ue_trigger,
+ &aer_fatal_trigger,
+#endif
+};
+
+void setup_event_trigger(const char *event)
{
- char *name;
- int rc;
- char *trigger_dir = getenv("TRIGGER_DIR");
+ int i, j;
+ struct event_trigger *trigger;
+ char *s, timeout_env[30];
+
+ trigger_dir = getenv("TRIGGER_DIR");
+
+ for (i = 0; i < ARRAY_SIZE(event_triggers); i++) {
+ trigger = event_triggers[i];
+
+ if (strcmp(event, trigger->event_name))
+ continue;
+
+ s = getenv(trigger->env);
+ if (!s || !strcmp(s, ""))
+ continue;
+
+ trigger->path = s;
+ if (trigger_check(trigger)) {
+ log(ALL, LOG_ERR, "Cannot access trigger `%s`: %s\n", s, strerror(errno));
+ continue;
+ }
+
+ log(ALL, LOG_NOTICE, "Setup %s trigger `%s`\n", trigger->event_name, s);
+
+ snprintf(timeout_env, sizeof(timeout_env), "%s_TIMEOUT", trigger->env);
- if (trigger_dir) {
- if (asprintf(&name, "%s/%s", trigger_dir, s) < 0)
- return NULL;
- s = name;
+ trigger->timeout = 1;
+ s = getenv(timeout_env);
+ if (!s || !strcmp(s, "")) {
+ log(ALL, LOG_NOTICE,
+ "Setup %s trigger default timeout 1s\n",
+ trigger->event_name);
+ continue;
+ }
+
+ j = atoi(s);
+ if (j < 0)
+ log(ALL, LOG_ERR,
+ "Invalid %s trigger timeout `%d` use default value: 1s\n",
+ trigger->event_name, j);
+ else if (j == 0) {
+ log(ALL, LOG_NOTICE,
+ "%s trigger no timeout\n",
+ trigger->event_name);
+ trigger->timeout = 0;
+ } else {
+ log(ALL, LOG_NOTICE,
+ "Setup %s trigger timeout `%d`s\n",
+ trigger->event_name, j);
+ trigger->timeout = j;
+ }
}
+}
+
+static void __run_mce_trigger(struct mce_event *e, struct event_trigger *trigger)
+{
+ char *env[MAX_ENV];
+ int ei = 0, i;
+
+ if (!trigger->path || !strcmp(trigger->path, ""))
+ return;
+
+ if (asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin") < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MCGCAP=%#lx", e->mcgcap) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MCGSTATUS=%#lx", e->mcgstatus) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "STATUS=%#lx", e->status) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "ADDR=%#lx", e->addr) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MISC=%#lx", e->misc) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "IP=%#lx", e->ip) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "TSC=%#lx", e->tsc) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "WALLTIME=%#lx", e->walltime) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "CPU=%#x", e->cpu) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "CPUID=%#x", e->cpuid) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "APICID=%#x", e->apicid) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "SOCKETID=%#x", e->socketid) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "CS=%#x", e->cs) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "BANK=%#x", e->bank) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "CPUVENDOR=%#x", e->cpuvendor) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "SYND=%#lx", e->synd) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "IPID=%#lx", e->ipid) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "TIMESTAMP=%s", e->timestamp) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "BANK_NAME=%s", e->bank_name) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "ERROR_MSG=%s", e->error_msg) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MCGSTATUS_MSG=%s", e->mcgstatus_msg) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MCISTATUS_MSG=%s", e->mcistatus_msg) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MCASTATUS_MSG=%s", e->mcastatus_msg) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "USER_ACTION=%s", e->user_action) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MC_LOCATION=%s", e->mc_location) < 0)
+ goto free;
+ env[ei] = NULL;
+ assert(ei < MAX_ENV);
- rc = access(s, R_OK | X_OK);
+ run_trigger(trigger, NULL, env);
- if (!rc)
- return(s);
+free:
+ for (i = 0; i < ei; i++)
+ free(env[i]);
+}
+
+void run_mce_record_trigger(struct mce_event *e)
+{
+ if (e->status & MCI_STATUS_UC)
+ __run_mce_trigger(e, &mce_ue_trigger);
+ else if (e->status & MCI_STATUS_DEFERRED)
+ __run_mce_trigger(e, &mce_de_trigger);
+ else
+ __run_mce_trigger(e, &mce_ce_trigger);
+}
+
+static void __run_mc_trigger(struct ras_mc_event *ev, struct event_trigger *trigger)
+{
+ char *env[MAX_ENV];
+ int ei = 0, i;
+
+ if (!trigger->path || !strcmp(trigger->path, ""))
+ return;
+
+ if (asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin") < 0)
+ goto free;
+ if (asprintf(&env[ei++], "TIMESTAMP=%s", ev->timestamp) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "COUNT=%d", ev->error_count) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "TYPE=%s", ev->error_type) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MESSAGE=%s", ev->msg) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "LABEL=%s", ev->label) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MC_INDEX=%d", ev->mc_index) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "TOP_LAYER=%d", ev->top_layer) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MIDDLE_LAYER=%d", ev->middle_layer) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "LOWER_LAYER=%d", ev->lower_layer) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "ADDRESS=%llx", ev->address) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "GRAIN=%lld", ev->grain) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "SYNDROME=%llx", ev->syndrome) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "DRIVER_DETAIL=%s", ev->driver_detail) < 0)
+ goto free;
+ env[ei] = NULL;
+ assert(ei < MAX_ENV);
+
+ run_trigger(trigger, NULL, env);
+
+free:
+ for (i = 0; i < ei; i++)
+ free(env[i]);
+}
+
+void run_mc_event_trigger(struct ras_mc_event *e)
+{
+ if (!strcmp(e->error_type, "Corrected"))
+ __run_mc_trigger(e, &mc_ce_trigger);
+
+ if (!strcmp(e->error_type, "Uncorrected"))
+ __run_mc_trigger(e, &mc_ue_trigger);
+}
+
+static void __run_mf_trigger(struct ras_mf_event *ev, struct event_trigger *trigger)
+{
+ char *env[MAX_ENV];
+ int ei = 0;
+ int i;
+
+ if (!trigger->path || !strcmp(trigger->path, ""))
+ return;
+
+ if (asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin") < 0)
+ goto free;
+ if (asprintf(&env[ei++], "TIMESTAMP=%s", ev->timestamp) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "PFN=%s", ev->pfn) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "PAGE_TYPE=%s", ev->page_type) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "ACTION_RESULT=%s", ev->action_result) < 0)
+ goto free;
+
+ env[ei] = NULL;
+ assert(ei < MAX_ENV);
+
+ run_trigger(trigger, NULL, env);
+
+free:
+ for (i = 0; i < ei; i++)
+ free(env[i]);
+}
- return NULL;
+void run_mf_event_trigger(struct ras_mf_event *e)
+{
+ __run_mf_trigger(e, &mf_trigger);
+}
+
+static void __run_aer_trigger(struct ras_aer_event *ev, struct event_trigger *trigger)
+{
+ char *env[MAX_ENV];
+ int ei = 0;
+ int i;
+
+ if (!trigger->path || !strcmp(trigger->path, ""))
+ return;
+
+ if (asprintf(&env[ei++], "PATH=%s", getenv("PATH") ?: "/sbin:/usr/sbin:/bin:/usr/bin") < 0)
+ goto free;
+ if (asprintf(&env[ei++], "TIMESTAMP=%s", ev->timestamp) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "ERROR_TYPE=%s", ev->error_type) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "DEV_NAME=%s", ev->dev_name) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "TLP_HEADER_VALID=%d", ev->tlp_header_valid) < 0)
+ goto free;
+ if (ev->tlp_header_valid)
+ if (asprintf(&env[ei++], "TLP_HEADER=%08x %08x %08x %08x",
+ ev->tlp_header[0], ev->tlp_header[1],
+ ev->tlp_header[2], ev->tlp_header[3]) < 0)
+ goto free;
+ if (asprintf(&env[ei++], "MSG=%s", ev->msg) < 0)
+ goto free;
+
+ env[ei] = NULL;
+ assert(ei < MAX_ENV);
+
+ run_trigger(trigger, NULL, env);
+
+free:
+ for (i = 0; i < ei; i++)
+ free(env[i]);
+}
+
+void run_aer_event_trigger(struct ras_aer_event *e)
+{
+ if (!strcmp(e->error_type, "Corrected"))
+ __run_aer_trigger(e, &aer_ce_trigger);
+ else if (!strcmp(e->error_type, "Uncorrected (Non-Fatal)"))
+ __run_aer_trigger(e, &aer_ue_trigger);
+ else if (!strcmp(e->error_type, "Uncorrected (Fatal)"))
+ __run_aer_trigger(e, &aer_fatal_trigger);
}
diff --git a/trigger.h b/trigger.h
index 0cc9df5..8d42176 100644
--- a/trigger.h
+++ b/trigger.h
@@ -1,12 +1,23 @@
#ifndef __TRIGGER_H__
#define __TRIGGER_H__
+#include "ras-record.h"
+
struct event_trigger {
- const char *name;
- void (*setup)(void);
+ const char *event_name;
+ const char *env;
+ char *path;
+ char abs_path[256];
+ int timeout;
};
-const char *trigger_check(const char *s);
-void run_trigger(const char *trigger, char *argv[], char **env, const char *reporter);
+int trigger_check(struct event_trigger *t);
+void run_trigger(struct event_trigger *t, char *argv[], char **envr);
+void setup_event_trigger(const char *event);
+
+void run_mc_event_trigger(struct ras_mc_event *e);
+void run_mce_record_trigger(struct mce_event *e);
+void run_mf_event_trigger(struct ras_mf_event *e);
+void run_aer_event_trigger(struct ras_aer_event *e);
#endif
--
2.33.1
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/src-anolis-os/rasdaemon.git
[email protected]:src-anolis-os/rasdaemon.git
src-anolis-os
rasdaemon
rasdaemon
a8

搜索帮助