8 Star 1 Fork 7

src-anolis-os/rasdaemon

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
1079-rasdaemon-erst-add-erst-mce-erst-dmesg.patch 35.19 KB
一键复制 编辑 原始数据 按行查看 历史
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393
From 2a8be760ca0502748f9ee1922942328886eaa745 Mon Sep 17 00:00:00 2001
From: Ruidong Tian <[email protected]>
Date: Thu, 12 Dec 2024 09:37:06 +0800
Subject: [PATCH 79/85] rasdaemon: erst: add erst-mce erst-dmesg
Signed-off-by: Ruidong Tian <[email protected]>
---
Makefile.am | 8 +-
configure.ac | 15 +
misc/rasdaemon.env | 2 +
ras-erst.c | 1082 ++++++++++++++++++++++++++++++++++++++++++++
ras-erst.h | 11 +
ras-mce-handler.c | 2 +-
ras-mce-handler.h | 5 +
ras-record.h | 5 +
ras-report-json.c | 15 +-
rasdaemon.c | 25 +
10 files changed, 1162 insertions(+), 8 deletions(-)
create mode 100644 ras-erst.c
create mode 100644 ras-erst.h
diff --git a/Makefile.am b/Makefile.am
index 024757d..5e87894 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -75,15 +75,19 @@ endif
if WITH_KMSG_MONITOR
rasdaemon_SOURCES += ras-kmsg.c
endif
+if WITH_ERST
+ rasdaemon_SOURCES += ras-erst.c
+endif
-rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a $(PCI_LIBS)
+rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a $(PCI_LIBS) $(ZLIBS)
include_HEADERS = config.h types.h ras-events.h ras-logger.h ras-mc-handler.h \
ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \
ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \
ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \
non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \
- ras-cpu-isolation.h queue.h non-standard-yitian.h trigger.h ras-kmsg.h ras-time.h
+ ras-cpu-isolation.h queue.h non-standard-yitian.h trigger.h ras-kmsg.h \
+ ras-erst.h ras-time.h
# This rule can't be called with more than one Makefile job (like make -j8)
# I can't figure out a way to fix that
diff --git a/configure.ac b/configure.ac
index 2136739..bf44582 100644
--- a/configure.ac
+++ b/configure.ac
@@ -206,6 +206,20 @@ AS_IF([test "x$enable_kmsg_monitor" = "xyes" || test "x$enable_all" == "xyes"],
AM_CONDITIONAL([WITH_KMSG_MONITOR], [test x$enable_kmsg_monitor = xyes || test x$enable_all == xyes])
AM_COND_IF([WITH_KMSG_MONITOR], [USE_KMSG_MONITOR="yes"], [USE_KMSG_MONITOR="no"])
+AC_ARG_ENABLE([erst],
+ AS_HELP_STRING([--enable-erst], [enable erst (currently experimental)]))
+
+AS_IF([test "x$enable_erst" = "xyes" || test "x$enable_all" == "xyes"], [
+ AC_CHECK_LIB(z, inflate,[echo "found zlib"] , AC_MSG_ERROR([*** Unable to find zlib library]), )
+ ZLIBS="-lz"
+ AC_DEFINE(HAVE_ERST,1,"have ERST")
+ AC_SUBST([WITH_ERST])
+])
+AM_CONDITIONAL([WITH_ERST], [test x$enable_erst = xyes || test x$enable_all == xyes])
+AM_COND_IF([WITH_ERST], [USE_ERST="yes"], [USE_ERST="no"])
+
+AC_SUBST([ZLIBS])
+
test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc
CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes"
@@ -250,4 +264,5 @@ compile time options summary
YITIAN RAS errors : $USE_YITIAN_NS_DECODE
Json exporter : $USE_JSON_REPORT
Kmsg monitor : $USE_KMSG_MONITOR
+ ERST : $USE_ERST
EOF
diff --git a/misc/rasdaemon.env b/misc/rasdaemon.env
index 2232a29..efb109e 100644
--- a/misc/rasdaemon.env
+++ b/misc/rasdaemon.env
@@ -98,6 +98,8 @@ POST_PAGE_OFFLINE_TRIGGER_TIMEOUT=0
KMSG_TRIGGER=
KMSG_TRIGGER_TIMEOUT=0
+ERST_DELETE=1
+
# KMSG MONITOR
KMSG_IGNORE_XID=""
KMSG_LIMIT=0
diff --git a/ras-erst.c b/ras-erst.c
new file mode 100644
index 0000000..ebf6ae4
--- /dev/null
+++ b/ras-erst.c
@@ -0,0 +1,1082 @@
+#include <regex.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <stddef.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <unistd.h>
+#include <zlib.h>
+#include <pci/pci.h>
+#include <utmp.h>
+#include <fcntl.h>
+#include "libtrace/event-parse.h"
+#include "ras-events.h"
+#include "ras-logger.h"
+#include "ras-mce-handler.h"
+#include "ras-aer-handler.h"
+#include "bitfield.h"
+#include "ras-report.h"
+#include "types.h"
+
+#include "ras-erst.h"
+
+struct mce {
+ uint64_t status; /* Bank's MCi_STATUS MSR */
+ uint64_t misc; /* Bank's MCi_MISC MSR */
+ uint64_t addr; /* Bank's MCi_ADDR MSR */
+ uint64_t mcgstatus; /* Machine Check Global Status MSR */
+ uint64_t ip; /* Instruction Pointer when the error happened */
+ uint64_t tsc; /* CPU time stamp counter */
+ uint64_t time; /* Wall time_t when error was detected */
+ uint8_t cpuvendor; /* Kernel's X86_VENDOR enum */
+ uint8_t inject_flags; /* Software inject flags */
+ uint8_t severity; /* Error severity */
+ uint8_t pad;
+ uint32_t cpuid; /* CPUID 1 EAX */
+ uint8_t cs; /* Code segment */
+ uint8_t bank; /* Machine check bank reporting the error */
+ uint8_t cpu; /* CPU number; obsoleted by extcpu */
+ uint8_t finished; /* Entry is valid */
+ uint32_t extcpu; /* Linux CPU number that detected the error */
+ uint32_t socketid; /* CPU socket ID */
+ uint32_t apicid; /* CPU initial APIC ID */
+ uint64_t mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
+ uint64_t synd; /* MCA_SYND MSR: only valid on SMCA systems */
+ uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */
+ uint64_t ppin; /* Protected Processor Inventory Number */
+ uint32_t microcode; /* Microcode revision */
+ uint64_t kflags; /* Internal kernel use */
+};
+
+struct apei_regex {
+ regex_t hdr;
+ regex_t severity;
+ regex_t error;
+ regex_t fru;
+ regex_t type;
+
+ regex_t addr;
+ regex_t loc;
+ regex_t mem_type;
+ regex_t mem_status;
+
+ regex_t port_type;
+ regex_t port;
+ regex_t id;
+ regex_t status;
+ regex_t aer_sev;
+ regex_t tlp_hdr;
+
+ regex_t cpu_id;
+
+ regex_t midr;
+ regex_t mpidr;
+};
+
+enum {
+ APEI_NONE,
+ APEI_CPU,
+ APEI_MEM,
+ APEI_PCIE,
+ APEI_ARM,
+};
+
+struct apei {
+ int id;
+ int sev;
+ int err_id;
+ char *fru;
+ int type;
+ time_t time;
+ union {
+ struct {
+ uint64_t addr;
+ char *loc;
+ char *status;
+ char *type;
+ } mem;
+ struct {
+ int port_type;
+ char *port;
+ char *vendor_id;
+ char *device_id;
+ char *status;
+ char *mask;
+ char *sev;
+ char *tlp_hdr;
+ } pcie;
+ struct {
+ char *cpu_id;
+ } cpu;
+ struct {
+ char *midr;
+ char *mpidr;
+ } arm;
+ };
+};
+
+int erst_mce_enable = 1;
+int erst_panic_enable = 1;
+int erst_delete = 0;
+time_t last_reboot_time = 0;
+
+#define ERST_PATH "/sys/fs/pstore/"
+#define MCE_ERST_PREFIX "mce-erst"
+#define ERST_EVENT_NAME "mce_erst_record"
+#define ERST_PANIC_NAME "dmesg_erst_record"
+#define LAST_REBOOT_INDEX 2
+
+static void get_last_reboot_time(void)
+{
+ struct utmp record;
+ int fd;
+ int reboots_found = 0;
+ time_t reboot_times;
+
+ fd = open("/var/log/wtmp", O_RDONLY);
+ if (fd == -1) {
+ log(ALL, LOG_ERR, "Error opening wtmp file");
+ return;
+ }
+
+ if (lseek(fd, -1 * sizeof(struct utmp), SEEK_END) == -1) {
+ perror("Error seeking in wtmp file");
+ close(fd);
+ return;
+ }
+
+ while (reboots_found < LAST_REBOOT_INDEX) {
+ if (read(fd, &record, sizeof(struct utmp)) != sizeof(struct utmp)) {
+ perror("Error reading wtmp file");
+ close(fd);
+ return;
+ }
+
+ if (strncmp(record.ut_line, "~", 1) == 0) {
+ if (strncmp(record.ut_user, "reboot", 6) == 0) {
+ reboot_times = record.ut_tv.tv_sec;
+ reboots_found++;
+ }
+ }
+
+ if (lseek(fd, -2 * sizeof(struct utmp), SEEK_CUR) == -1) {
+ reboot_times = 0;
+ break;
+ }
+ }
+
+ close(fd);
+
+ last_reboot_time = reboot_times;
+
+ return;
+}
+
+#ifdef HAVE_MCE
+static void ras_erst_mce_handler(struct ras_events *ras, struct mce_event *e)
+{
+ struct mce_priv *mce = ras->mce_priv;
+ struct trace_seq s;
+ int rc = 0, len;
+ static char *spaces = " "; /* 20 spaces */
+
+ switch (mce->cputype) {
+ case CPU_GENERIC:
+ break;
+ case CPU_K8:
+ rc = parse_amd_k8_event(ras, e);
+ break;
+ case CPU_AMD_SMCA:
+ case CPU_DHYANA:
+ rc = parse_amd_smca_event(ras, e);
+ break;
+ default: /* All other CPU types are Intel */
+ rc = parse_intel_event(ras, e);
+ }
+
+ if (rc)
+ return;
+
+ if (!e->error_msg && e->mcastatus_msg)
+ mce_snprintf(e->error_msg, "%s", e->mcastatus_msg);
+
+ trace_seq_init(&s);
+
+ trace_seq_printf(&s, " %s: ", ERST_EVENT_NAME);
+
+ len = strlen(ERST_EVENT_NAME);
+ if (len < 20)
+ trace_seq_printf(&s, "%.*s", 20 - len, spaces);
+
+ report_mce_event(ras, NULL, &s, e);
+
+#ifdef HAVE_JSON_REPORT
+ strftime(e->timestamp, sizeof(e->timestamp),
+ "%Y-%m-%d %H:%M:%S %z", localtime((time_t *)&e->walltime));
+
+ report_mce_event_json(&s, e);
+#endif
+
+ trace_seq_terminate(&s);
+
+ trace_seq_do_printf(&s);
+ printf("\n");
+ fflush(stdout);
+ trace_seq_destroy(&s);
+}
+
+static void handle_erst_mce_file(const char *dir_name, const char *d_name, struct ras_events *ras)
+{
+ char file_path[512];
+ FILE *file;
+ struct mce mce;
+ struct mce_event e = { 0 };
+ int rc;
+
+ if (strncmp(d_name, MCE_ERST_PREFIX, strlen(MCE_ERST_PREFIX)))
+ return;
+
+ snprintf(file_path, sizeof(file_path), "%s/%s", dir_name, d_name);
+
+ file = fopen(file_path, "r");
+ if (!file) {
+ log(ALL, LOG_INFO, "Failed to open file %s\n", file_path);
+ return;
+ }
+
+ rc = fread((char *)&mce, 1, sizeof(mce), file);
+ if (rc < sizeof(mce)) {
+ log(ALL, LOG_ERR, "Failed to read file");
+ fclose(file);
+ }
+
+ e.mcgcap = mce.mcgcap;
+ e.mcgstatus = mce.mcgstatus;
+
+ e.status = mce.status;
+ e.addr = mce.addr;
+ e.misc = mce.misc;
+ e.synd = mce.synd;
+ e.ipid = mce.ipid;
+ e.ip = mce.ip;
+ e.tsc = mce.tsc;
+ e.walltime = mce.time;
+ e.cpu = mce.extcpu;
+ e.cpuid = mce.cpuid;
+ e.apicid = mce.apicid;
+ e.socketid = mce.socketid;
+ e.cs = mce.cs;
+ e.bank = mce.bank;
+ e.cpuvendor = mce.cpuvendor;
+ e.erst = 1;
+
+ ras_erst_mce_handler(ras, &e);
+
+ fclose(file);
+
+ if (erst_delete && unlink(file_path)) {
+ log(ALL, LOG_INFO, "Error deleting file %s\n", file_path);
+ return;
+ }
+}
+
+void handle_erst_mce(void)
+{
+ struct ras_events ras = { 0 };
+ int rc;
+
+ if (!last_reboot_time)
+ get_last_reboot_time();
+
+ rc = register_mce_handler(&ras, sysconf(_SC_NPROCESSORS_CONF));
+ if (rc) {
+ log(ALL, LOG_INFO, "Can't register mce handler\n");
+ return;
+ }
+
+ if (!ras.mce_priv) {
+ log(ALL, LOG_INFO, "Register mce handler failed\n");
+ return;
+ }
+
+ DIR *dir = opendir(ERST_PATH);
+
+ if (!dir) {
+ log(ALL, LOG_INFO, "Failed to open directory");
+ return;
+ }
+
+ struct dirent *entry;
+
+ while ((entry = readdir(dir)) != NULL) {
+ struct stat path_stat;
+ char file_path[MAX_PATH];
+
+ snprintf(file_path, sizeof(file_path), "%s/%s", ERST_PATH, entry->d_name);
+ stat(file_path, &path_stat);
+
+ if (S_ISDIR(path_stat.st_mode) && !strncmp("erst", entry->d_name, sizeof("erst"))) {
+ DIR *subdir = opendir(file_path);
+ struct dirent *subentry;
+
+ if (!subdir) {
+ log(ALL, LOG_INFO, "Failed to open directory %s\n", strerror(errno));
+ break;
+ }
+ while ((subentry = readdir(subdir)) != NULL)
+ handle_erst_mce_file(file_path, subentry->d_name, &ras);
+
+ closedir(subdir);
+
+ } else
+ handle_erst_mce_file(ERST_PATH, entry->d_name, &ras);
+ }
+
+ closedir(dir);
+}
+#endif
+
+#define DMESG_ERST_PREFIX "dmesg-erst"
+#define DMESG_ERST_SUFFIX "enc.z"
+
+#define APEI_HEADER ".*\\[(.*).[0-9]+\\] \\{([0-9]+)\\}\\[Hardware Error\\]: Hardware error from APEI Generic Hardware Error Source:.*"
+#define APEI_SEVERITY ".*\\{([0-9]+)\\}\\[Hardware Error\\]: event severity: (.*)"
+#define APEI_ERROR ".*\\{([0-9]+)\\}\\[Hardware Error\\]: Error ([0-9]+), type: (.*)"
+#define APEI_MEM_FRU ".*\\{([0-9]+)\\}\\[Hardware Error\\]: fru_text: (.*)"
+#define APEI_TYPE ".*\\{([0-9]+)\\}\\[Hardware Error\\]: section_type: (.*)"
+
+// MEM
+#define APEI_MEM_ADDR ".*\\{([0-9]+)\\}\\[Hardware Error\\]: physical_address: (.*)"
+#define APEI_MEM_LOC ".*\\{([0-9]+)\\}\\[Hardware Error\\]: (node:.*)"
+#define APEI_MEM_TYPE ".*\\{([0-9]+)\\}\\[Hardware Error\\]: error_type: [0-9]+, (.*)"
+#define APEI_MEM_STATUS ".*\\{([0-9]+)\\}\\[Hardware Error\\]:.*error_status: (.*) \\(.*\\)"
+
+// PCIE
+#define APEI_PORT_TYPE ".*\\{([0-9]+)\\}\\[Hardware Error\\]: port_type: ([0-9]+), (.*)"
+#define APEI_PORT ".*\\{([0-9]+)\\}\\[Hardware Error\\]: device_id: (.*)"
+#define APEI_ID ".*\\{([0-9]+)\\}\\[Hardware Error\\]: vendor_id: (.*), device_id: (.*)"
+#define APEI_STATUS ".*\\{([0-9]+)\\}\\[Hardware Error\\]: aer_uncor_status: (.*), aer_uncor_mask: (.*)"
+#define APEI_AER_SEVE ".*\\{([0-9]+)\\}\\[Hardware Error\\]: aer_uncor_severity: (.*)"
+#define APEI_TLP_HDR ".*\\{([0-9]+)\\}\\[Hardware Error\\]: TLP Header: (.*)"
+
+#define APEI_CPU_ID ".*\\{([0-9]+)\\}\\[Hardware Error\\]: processor_id: (.*)"
+
+#define APEI_ARM_MIDR ".*\\{([0-9]+)\\}\\[Hardware Error\\]: MIDR: (.*)"
+#define APEI_ARM_MPIDR ".*\\{([0-9]+)\\}\\[Hardware Error\\]: Multiprocessor Affinity Register \\(MPIDR\\): (.*)"
+
+#define ERST_DELETE_FILE "ERST_DELETE_FILE"
+
+static int decompress_deflate(const char *compressed_data, ssize_t compressed_data_size,
+ char *decompressed_data, ssize_t *decompressed_data_size, z_stream *zstream)
+{
+ int ret = Z_OK;
+
+ ret = inflateReset2(zstream, -MAX_WBITS);
+ if (ret != Z_OK)
+ return ret;
+
+ zstream->next_in = (Bytef *)compressed_data;
+ zstream->avail_in = compressed_data_size;
+ zstream->next_out = (Bytef *)decompressed_data;
+ zstream->avail_out = *decompressed_data_size;
+
+ ret = inflate(zstream, Z_FINISH);
+ if (ret != Z_STREAM_END)
+ return Z_DATA_ERROR;
+
+ *decompressed_data_size = zstream->total_out;
+
+ return ret;
+}
+
+static void apei_report_mem(struct trace_seq *s, struct apei *apei)
+{
+ struct ras_mc_event ev = {0};
+ char msg_buf[400];
+ time_t t;
+ struct tm *tm;
+
+ ev.erst = 1;
+ if (!apei->time)
+ t = time(NULL);
+ else
+ t = apei->time;
+
+ tm = localtime(&t);
+ if (tm)
+ strftime(ev.timestamp, sizeof(ev.timestamp),
+ "%Y-%m-%d %H:%M:%S %z", tm);
+
+ ev.error_count = 1;
+ ev.grain = 1;
+ ev.top_layer = -1;
+ ev.middle_layer = -1;
+ ev.lower_layer = -1;
+
+ switch (apei->sev) {
+ case GHES_SEV_CORRECTED:
+ ev.error_type = "Corrected";
+ break;
+ case GHES_SEV_RECOVERABLE:
+ ev.error_type = "Uncorrected";
+ break;
+ case GHES_SEV_PANIC:
+ ev.error_type = "Fatal";
+ break;
+ default:
+ ev.error_type = "Info";
+ }
+ ev.severity = apei->sev;
+
+ snprintf(msg_buf, 400, "APEI location: %s status(0x00000000): %s",
+ apei->mem.loc,
+ apei->mem.status ? apei->mem.status : "");
+ ev.driver_detail = msg_buf;
+
+ ev.address = apei->mem.addr;
+ ev.mc_index = 0;
+
+#ifdef HAVE_JSON_REPORT
+ report_mc_event_json(s, &ev);
+#endif
+}
+
+/* bit field meaning for correctable error */
+static const char *aer_cor_errors[32] = {
+ /* Correctable errors */
+ [0] = "Receiver Error",
+ [6] = "Bad TLP",
+ [7] = "Bad DLLP",
+ [8] = "RELAY_NUM Rollover",
+ [12] = "Replay Timer Timeout",
+ [13] = "Advisory Non-Fatal",
+ [14] = "Corrected Internal Error",
+};
+
+/* bit field meaning for uncorrectable error */
+static const char *aer_uncor_errors[32] = {
+ /* Uncorrectable errors */
+ [4] = "Data Link Protocol",
+ [12] = "Poisoned TLP",
+ [13] = "Flow Control Protocol",
+ [14] = "Completion Timeout",
+ [15] = "Completer Abort",
+ [16] = "Unexpected Completion",
+ [17] = "Receiver Overflow",
+ [18] = "Malformed TLP",
+ [19] = "ECRC",
+ [20] = "Unsupported Request",
+};
+
+static void apei_report_pcie(struct trace_seq *s, struct apei *apei)
+{
+ struct ras_aer_event ev = {0};
+ unsigned long long status_val;
+ char buf[1024];
+ time_t t;
+ struct tm *tm;
+
+ ev.erst = 1;
+ if (!apei->time)
+ t = time(NULL);
+ else
+ t = apei->time;
+
+ tm = localtime(&t);
+ if (tm)
+ strftime(ev.timestamp, sizeof(ev.timestamp),
+ "%Y-%m-%d %H:%M:%S %z", tm);
+
+ ev.dev_name = apei->pcie.port;
+ ev.vendor_id = strtoul(apei->pcie.vendor_id, NULL, 16);
+ ev.device_id = strtoul(apei->pcie.device_id, NULL, 16);
+
+ if (apei->pcie.status) {
+ status_val = strtoull(apei->pcie.status, NULL, 16);
+
+ if (apei->sev == GHES_SEV_CORRECTED)
+ bitfield_msg(buf, sizeof(buf), aer_cor_errors, 32, 0, 0, status_val);
+ else
+ bitfield_msg(buf, sizeof(buf), aer_uncor_errors, 32, 0, 0, status_val);
+ } else {
+ snprintf(buf, 1024, "no status");
+ }
+ ev.msg = buf;
+
+ ev.tlp_header_valid = (apei->pcie.tlp_hdr != NULL);
+ if (ev.tlp_header_valid)
+ snprintf((buf + strlen(ev.msg)), 1024 - strlen(ev.msg),
+ " TLP Header: %s", apei->pcie.tlp_hdr);
+
+ ev.severity = apei->sev;
+ switch (apei->sev) {
+ case GHES_SEV_RECOVERABLE:
+ ev.error_type = "Uncorrected (Non-Fatal)";
+ break;
+ case GHES_SEV_PANIC:
+ ev.error_type = "Uncorrected (Fatal)";
+ break;
+ case GHES_SEV_CORRECTED:
+ ev.error_type = "Corrected";
+ break;
+ default:
+ ev.error_type = "Unknown severity";
+ }
+
+#ifdef HAVE_JSON_REPORT
+ report_aer_event_json(s, &ev);
+#endif
+}
+
+static void report_apei(struct apei *apei)
+{
+ struct trace_seq seq;
+ time_t t;
+ struct tm *tm;
+ char timestamp[64];
+
+ if (!apei->type)
+ return;
+
+ trace_seq_init(&seq);
+ //trace_seq_printf(&seq, "{ \"event_name\": \"%s\", ", ERST_PANIC_NAME);
+
+ switch (apei->type) {
+ case APEI_MEM:
+ apei_report_mem(&seq, apei);
+ break;
+ case APEI_PCIE:
+ apei_report_pcie(&seq, apei);
+ break;
+ case APEI_CPU:
+ if (!apei->time)
+ t = time(NULL);
+ else
+ t = apei->time;
+
+ tm = localtime(&t);
+ if (tm)
+ strftime(timestamp, sizeof(timestamp),
+ "%Y-%m-%d %H:%M:%S %z", tm);
+
+ trace_seq_printf(&seq, "{ \"%s\": \"%s\", ", JSON_REPORT_KEY, "erst_cpu");
+ trace_seq_printf(&seq, "\"timestamp\": \"%s\", ", timestamp);
+ trace_seq_printf(&seq, "\"fru\": \"%s\", ", apei->fru ? apei->fru : "");
+ trace_seq_printf(&seq, "\"severity\": \"%s\", ", severity_strs[apei->sev]);
+ trace_seq_printf(&seq, "\"cpu_id\": \"%s\" ", apei->cpu.cpu_id ? apei->cpu.cpu_id : "");
+ trace_seq_puts(&seq, "}");
+ break;
+ case APEI_ARM:
+ if (!apei->time)
+ t = time(NULL);
+ else
+ t = apei->time;
+
+ tm = localtime(&t);
+ if (tm)
+ strftime(timestamp, sizeof(timestamp),
+ "%Y-%m-%d %H:%M:%S %z", tm);
+
+ trace_seq_printf(&seq, "{ \"%s\": \"%s\", ", JSON_REPORT_KEY, "erst_arm_cpu");
+ trace_seq_printf(&seq, "\"timestamp\": \"%s\", ", timestamp);
+ trace_seq_printf(&seq, "\"fru\": \"%s\", ", apei->fru ? apei->fru : "");
+ trace_seq_printf(&seq, "\"severity\": \"%s\", ", severity_strs[apei->sev]);
+ trace_seq_printf(&seq, "\"midr\": \"%s\" ", apei->arm.midr ? apei->arm.midr : "");
+ trace_seq_printf(&seq, "\"mpidr\": \"%s\" ", apei->arm.mpidr ? apei->arm.mpidr : "");
+ trace_seq_puts(&seq, "}");
+ break;
+ }
+
+ //trace_seq_puts(&seq, "}");
+ trace_seq_do_printf(&seq);
+ printf("\n");
+ fflush(stdout);
+ trace_seq_destroy(&seq);
+
+ memset(apei, 0, sizeof(*apei));
+ apei->err_id = -1;
+}
+
+static int is_compressed_file(const char *name)
+{
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), "%s", name + strlen(name) - strlen(DMESG_ERST_SUFFIX));
+ return strncmp(buf, DMESG_ERST_SUFFIX, sizeof(DMESG_ERST_SUFFIX)) == 0;
+}
+
+static int line_is_panic_part1(char *line)
+{
+ int count, part;
+
+ if (sscanf(line, "Panic#%d Part%u", &count, &part) != 2)
+ return 0;
+
+ return part == 1;
+}
+
+static int compressed_file_is_panic_part1(char *buf, const char *name, z_stream *zstream)
+{
+ ssize_t out_size = 0;
+ char out_buf[128], *line;
+
+ if (decompress_deflate(buf, strlen(buf), out_buf, &out_size, zstream))
+ return 0;
+
+ line = strtok(out_buf, "\n");
+
+ return line_is_panic_part1(line);
+}
+
+static int file_is_panic_part1(FILE *file, const char *name, z_stream *zstream)
+{
+ char line[32];
+
+ if (!fgets(line, 32, file))
+ return 0;
+
+ if (is_compressed_file(name))
+ return compressed_file_is_panic_part1(line, name, zstream);
+
+ return line_is_panic_part1(line);
+
+}
+
+static void regex_group(regmatch_t *m, int i, const char *line, char *buf)
+{
+ int e, s;
+
+ s = m[i].rm_so;
+ e = m[i].rm_eo;
+ if (s >= 0)
+ snprintf(buf, e - s + 1, "%s", line + s);
+ else
+ buf = NULL;
+}
+
+static int dmesg_erst_line_process(const char *line, struct apei_regex *regex, struct apei *apei)
+{
+ int ret, err_id = 0, apei_id = 0;
+ regmatch_t matches[4];
+ char buf[128];
+ regex_t *re;
+ time_t t;
+
+ ret = regexec(re = &regex->hdr, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei_id = atoi(buf);
+
+ if (apei->id && apei_id != apei->id)
+ report_apei(apei);
+ apei->id = apei_id;
+
+ regex_group(matches, 1, line, buf);
+ t = atoll(buf);
+
+ if (last_reboot_time)
+ apei->time = last_reboot_time + t;
+ else
+ apei->time = 0;
+
+ return 0;
+ }
+
+ ret = regexec(re = &regex->error, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ err_id = atoi(buf);
+
+ if (apei->err_id != -1 && err_id != apei->err_id)
+ report_apei(apei);
+
+ apei->err_id = err_id;
+
+ regex_group(matches, 3, line, buf);
+ if (!strcmp("corrected", buf))
+ apei->sev = GHES_SEV_CORRECTED;
+ else if (!strcmp("recoverable", buf))
+ apei->sev = GHES_SEV_RECOVERABLE;
+ else if (!strcmp("fatal", buf))
+ apei->sev = GHES_SEV_PANIC;
+ else
+ apei->sev = GHES_SEV_NO;
+ return 0;
+ }
+
+ if (!apei->type) {
+ ret = regexec(re = &regex->type, line, 4, matches, 0);
+ if (ret)
+ goto error;
+
+ regex_group(matches, 2, line, buf);
+ if (!strcmp("general processor error", buf))
+ apei->type = APEI_CPU;
+ else if (!strcmp("memory error", buf))
+ apei->type = APEI_MEM;
+ else if (!strcmp("PCIe error", buf))
+ apei->type = APEI_PCIE;
+ else if (!strcmp("ARM processor error", buf))
+ apei->type = APEI_ARM;
+ else
+ apei->type = APEI_NONE;
+
+ return 0;
+ }
+
+ switch (apei->type) {
+ case APEI_CPU:
+ ret = regexec(re = &regex->cpu_id, line, 4, matches, 0);
+ if (ret)
+ goto error;
+ regex_group(matches, 2, line, buf);
+ apei->cpu.cpu_id = strdup(buf);
+
+ return 0;
+ case APEI_ARM:
+ if (!apei->arm.midr) {
+ ret = regexec(re = &regex->midr, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->arm.midr = strdup(buf);
+
+ return 0;
+ }
+ }
+
+ if (!apei->arm.mpidr) {
+ ret = regexec(re = &regex->mpidr, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->arm.mpidr = strdup(buf);
+
+ return 0;
+ }
+ }
+
+ return 0;
+ case APEI_MEM:
+ if (!apei->mem.addr) {
+ ret = regexec(re = &regex->addr, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->mem.addr = strtoull(buf, NULL, 16);
+ return 0;
+ }
+ }
+
+ if (!apei->mem.loc) {
+ ret = regexec(re = &regex->loc, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->mem.loc = strdup(buf);
+ return 0;
+ }
+ }
+
+ if (!apei->mem.type) {
+ ret = regexec(re = &regex->mem_type, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->mem.type = strdup(buf);
+ return 0;
+ }
+ }
+
+ if (!apei->mem.status) {
+ ret = regexec(re = &regex->mem_status, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->mem.status = strdup(buf);
+ return 0;
+ }
+ }
+
+ case APEI_PCIE:
+ //port type
+ ret = regexec(re = &regex->port_type, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->pcie.port_type = atoi(buf);
+
+ return 0;
+ }
+
+ // port
+ if (!apei->pcie.port) {
+ ret = regexec(re = &regex->port, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->pcie.port = strdup(buf);
+ return 0;
+ }
+ }
+
+ // vendor id device id
+ if (!apei->pcie.vendor_id) {
+ ret = regexec(&regex->id, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->pcie.vendor_id = strdup(buf);
+ regex_group(matches, 3, line, buf);
+ apei->pcie.device_id = strdup(buf);
+
+ return 0;
+ }
+ }
+
+ // status
+ if (!apei->pcie.status) {
+ ret = regexec(re = &regex->status, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->pcie.status = strdup(buf);
+ regex_group(matches, 3, line, buf);
+ apei->pcie.mask = strdup(buf);
+
+ return 0;
+ }
+ }
+
+ // aer sev
+ if (!apei->pcie.sev) {
+ ret = regexec(re = &regex->aer_sev, line, 4, matches, 0);
+ if (ret > REG_NOMATCH) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->pcie.sev = strdup(buf);
+
+ return 0;
+ }
+ }
+
+ // tlp hdr
+ if (!apei->pcie.tlp_hdr) {
+ ret = regexec(re = &regex->tlp_hdr, line, 4, matches, 0);
+ if (ret) {
+ goto error;
+ } else if (!ret) {
+ regex_group(matches, 2, line, buf);
+ apei->pcie.tlp_hdr = strdup(buf);
+
+ return 0;
+ }
+ }
+ }
+
+error:
+ if (ret == REG_NOMATCH)
+ return 0;
+ regerror(ret, re, buf, sizeof(buf));
+ printf("Regex execution error: %s\n", buf);
+ return ret;
+}
+
+static int handle_erst_dmesg(FILE *file, const char *name, z_stream *zstream, struct apei_regex *regex)
+{
+ long fileSize;
+ char *file_buf, *line, *out_data = NULL;
+ ssize_t out_max_size, out_data_size = 0, bytesRead;
+ int ret = 0, line_number = 1;
+ struct apei apei = {0};
+
+ apei.err_id = -1;
+
+ if (!file_is_panic_part1(file, name, zstream))
+ return -1;
+
+ if (fseek(file, 0, SEEK_END) != 0)
+ return -1;
+
+ fileSize = ftell(file);
+ if (fileSize == -1)
+ return -1;
+
+ file_buf = (char *)malloc(fileSize + 1);
+ if (!file_buf)
+ return -1;
+
+ rewind(file);
+ bytesRead = fread(file_buf, 1, fileSize, file);
+ if (bytesRead != fileSize) {
+ ret = -1;
+ goto free_file;
+ }
+ file_buf[fileSize] = '\0';
+
+ if (is_compressed_file(name)) {
+ out_max_size = fileSize * 3;
+ out_data = (char *)malloc(out_max_size);
+ if (!out_data) {
+ ret = -1;
+ goto free_file;
+ }
+
+ ret = decompress_deflate(file_buf, fileSize, out_data, &out_data_size, zstream);
+ if (ret)
+ goto free_out;
+
+ file_buf = out_data;
+ }
+
+ line = strtok(file_buf, "\n");
+
+ while (line) {
+ dmesg_erst_line_process(line, regex, &apei);
+
+ line = strtok(NULL, "\n");
+ line_number++;
+ }
+
+ report_apei(&apei);
+
+free_out:
+ if (out_data)
+ free(out_data);
+free_file:
+ free(file_buf);
+
+ return ret;
+}
+
+static int init_reg(regex_t *re, const char *str)
+{
+ char buf[128];
+ int ret = 0;
+
+ ret = regcomp(re, str, REG_EXTENDED);
+ if (ret) {
+ regerror(ret, re, buf, sizeof(buf));
+ printf("Regex execution error: %s\n", buf);
+ return ret;
+ }
+
+ return ret;
+}
+
+static void handle_erst_dmesg_file(const char *dir_name, const char *d_name, z_stream *zstream, struct apei_regex *regex)
+{
+ char file_path[512];
+ FILE *file;
+
+ if (strncmp(d_name, DMESG_ERST_PREFIX, strlen(DMESG_ERST_PREFIX)))
+ return;
+
+ snprintf(file_path, sizeof(file_path), "%s/%s", dir_name, d_name);
+
+ file = fopen(file_path, "r");
+ if (!file) {
+ log(ALL, LOG_INFO, "Failed to open file %s\n", file_path);
+ return;
+ }
+
+ handle_erst_dmesg(file, file_path, zstream, regex);
+
+ fclose(file);
+
+ if (erst_delete && unlink(file_path)) {
+ log(ALL, LOG_INFO, "Error deleting file %s\n", file_path);
+ return;
+ }
+}
+
+void handle_erst_panic(void)
+{
+ z_stream zstream = { 0 };
+ int rc = 0;
+ struct dirent *entry;
+ struct apei_regex regex;
+
+ if (!last_reboot_time)
+ get_last_reboot_time();
+
+ if (init_reg(&regex.hdr, APEI_HEADER) ||
+ init_reg(&regex.severity, APEI_SEVERITY) ||
+ init_reg(&regex.error, APEI_ERROR) ||
+ init_reg(&regex.fru, APEI_MEM_FRU) ||
+ init_reg(&regex.type, APEI_TYPE) ||
+ init_reg(&regex.addr, APEI_MEM_ADDR) ||
+ init_reg(&regex.loc, APEI_MEM_LOC) ||
+ init_reg(&regex.mem_type, APEI_MEM_TYPE) ||
+ init_reg(&regex.mem_status, APEI_MEM_STATUS) ||
+ init_reg(&regex.port_type, APEI_PORT_TYPE) ||
+ init_reg(&regex.port, APEI_PORT) ||
+ init_reg(&regex.id, APEI_ID) ||
+ init_reg(&regex.status, APEI_STATUS) ||
+ init_reg(&regex.aer_sev, APEI_AER_SEVE) ||
+ init_reg(&regex.tlp_hdr, APEI_TLP_HDR) ||
+ init_reg(&regex.cpu_id, APEI_CPU_ID) ||
+ init_reg(&regex.midr, APEI_ARM_MIDR) ||
+ init_reg(&regex.mpidr, APEI_ARM_MPIDR))
+ return;
+
+ DIR *dir = opendir(ERST_PATH);
+
+ if (!dir) {
+ log(ALL, LOG_INFO, "%s Failed to open directory %s\n", ERST_PATH, strerror(errno));
+ return;
+ }
+
+ inflateInit2(&zstream, -MAX_WBITS);
+ if (rc != Z_OK) {
+ log(ALL, LOG_INFO, "Failed to open init inflate %d\n", rc);
+ return;
+ }
+
+ while ((entry = readdir(dir)) != NULL) {
+ struct stat path_stat;
+ char file_path[MAX_PATH];
+
+ snprintf(file_path, sizeof(file_path), "%s/%s", ERST_PATH, entry->d_name);
+ stat(file_path, &path_stat);
+
+ if (S_ISDIR(path_stat.st_mode) && !strncmp("erst", entry->d_name, sizeof("erst"))) {
+ DIR *subdir = opendir(file_path);
+ struct dirent *subentry;
+
+ if (!subdir) {
+ log(ALL, LOG_INFO, "Failed to open directory %s\n", strerror(errno));
+ break;
+ }
+ while ((subentry = readdir(subdir)) != NULL)
+ handle_erst_dmesg_file(file_path, subentry->d_name, &zstream, &regex);
+
+ closedir(subdir);
+
+ } else
+ handle_erst_dmesg_file(ERST_PATH, entry->d_name, &zstream, &regex);
+ }
+
+ closedir(dir);
+
+ inflateEnd(&zstream);
+}
diff --git a/ras-erst.h b/ras-erst.h
new file mode 100644
index 0000000..96ab58a
--- /dev/null
+++ b/ras-erst.h
@@ -0,0 +1,11 @@
+#define ERST_DELETE "ERST_DELETE"
+
+extern int erst_mce_enable;
+extern int erst_panic_enable;
+extern int erst_delete;
+
+#ifdef HAVE_MCE
+void handle_erst_mce(void);
+#endif
+
+void handle_erst_panic(void);
diff --git a/ras-mce-handler.c b/ras-mce-handler.c
index ecc6468..686c308 100644
--- a/ras-mce-handler.c
+++ b/ras-mce-handler.c
@@ -275,7 +275,7 @@ int register_mce_handler(struct ras_events *ras, unsigned int ncpus)
* End of mcelog's code
*/
-static void report_mce_event(struct ras_events *ras,
+void report_mce_event(struct ras_events *ras,
struct pevent_record *record,
struct trace_seq *s, struct mce_event *e)
{
diff --git a/ras-mce-handler.h b/ras-mce-handler.h
index f0dbdab..df24be9 100644
--- a/ras-mce-handler.h
+++ b/ras-mce-handler.h
@@ -77,6 +77,7 @@ struct mce_event {
uint64_t ipid; /* MCA_IPID MSR: only valid on SMCA systems */
int severity;
+ int erst;
/* Parsed data */
char timestamp[64];
char bank_name[64];
@@ -178,4 +179,8 @@ int parse_amd_k8_event(struct ras_events *ras, struct mce_event *e);
int parse_amd_smca_event(struct ras_events *ras, struct mce_event *e);
+void report_mce_event(struct ras_events *ras,
+ struct pevent_record *record,
+ struct trace_seq *s, struct mce_event *e);
+
#endif
diff --git a/ras-record.h b/ras-record.h
index f48fe37..17cc981 100644
--- a/ras-record.h
+++ b/ras-record.h
@@ -45,6 +45,7 @@ struct ras_mc_event {
signed char top_layer, middle_layer, lower_layer;
unsigned long long address, grain, syndrome;
const char *driver_detail;
+ int erst;
};
struct ras_mc_offline_event {
@@ -64,6 +65,9 @@ struct ras_aer_event {
uint8_t tlp_header_valid;
uint32_t *tlp_header;
const char *msg;
+ int erst;
+ uint16_t vendor_id;
+ uint16_t device_id;
};
struct ras_extlog_event {
@@ -101,6 +105,7 @@ struct ras_arm_event {
uint32_t ctx_len;
const uint8_t *vsei_error;
uint32_t oem_len;
+ int erst;
};
struct devlink_event {
diff --git a/ras-report-json.c b/ras-report-json.c
index b175723..1c3b571 100644
--- a/ras-report-json.c
+++ b/ras-report-json.c
@@ -37,7 +37,7 @@ void report_mc_event_json(struct trace_seq *s, struct ras_mc_event *ev)
return;
trace_seq_printf(s,
- "\n{ \"%s\": \"mc_event\", " \
+ "\n{ \"%s\": \"%s\", " \
"\"timestamp\": \"%s\", " \
"\"severity\": \"%s\", " \
"\"error_count\": %d, " \
@@ -50,6 +50,7 @@ void report_mc_event_json(struct trace_seq *s, struct ras_mc_event *ev)
"\"syndrome\": \"%#llx\", " \
"\"driver_detail\": \"%s\" }",
JSON_REPORT_KEY,
+ ev->erst ? "erst_mc_event" : "mc_event",
(*ev->timestamp) ? ev->timestamp : NONE,
severity_strs[ev->severity],
ev->error_count,
@@ -121,7 +122,7 @@ void report_aer_event_json(struct trace_seq *s, struct ras_aer_event *ev)
get_pci_dev_name(ev->dev_name, pci_name, 128, &vendor, &device);
trace_seq_printf(s,
- "\n{ \"%s\": \"aer_event\", " \
+ "\n{ \"%s\": \"%s\", " \
"\"timestamp\": \"%s\", " \
"\"severity\": \"%s\", " \
"\"error_type\": \"%s\", " \
@@ -131,12 +132,14 @@ void report_aer_event_json(struct trace_seq *s, struct ras_aer_event *ev)
"\"device_id\": \"%#x\", " \
"\"msg\": \"%s\" }",
JSON_REPORT_KEY,
+ ev->erst ? "erst_aer_event" : "aer_event",
(*ev->timestamp) ? ev->timestamp : NONE,
severity_strs[ev->severity],
(ev->error_type) ? ev->error_type : NONE,
(ev->dev_name) ? ev->dev_name : NONE,
(*pci_name) ? pci_name : NONE,
- vendor, device,
+ ev->vendor_id ? ev->vendor_id : vendor,
+ ev->device_id ? ev->device_id: device,
(ev->msg) ? ev->msg : NONE);
}
@@ -146,7 +149,7 @@ void report_arm_event_json(struct trace_seq *s, struct ras_arm_event *ev)
return;
trace_seq_printf(s,
- "\n{ \"%s\": \"arm_event\", " \
+ "\n{ \"%s\": \"%s\", " \
"\"timestamp\": \"%s\", " \
"\"error_count\": %d, " \
"\"affinity\": %d, " \
@@ -155,6 +158,7 @@ void report_arm_event_json(struct trace_seq *s, struct ras_arm_event *ev)
"\"running_state\": %d, " \
"\"psci_state\": %d }",
JSON_REPORT_KEY,
+ ev->erst ? "erst_arm_event" : "arm_event",
(*ev->timestamp) ? ev->timestamp : NONE,
ev->error_count,
ev->affinity,
@@ -193,7 +197,7 @@ void report_mce_event_json(struct trace_seq *s, struct mce_event *ev)
ev->severity = GHES_SEV_CORRECTED;
trace_seq_printf(s,
- "\n{ \"%s\": \"mce_record\", " \
+ "\n{ \"%s\": \"%s\", " \
"\"timestamp\": \"%s\", " \
"\"severity\": \"%s\", " \
"\"bank\": %d, " \
@@ -218,6 +222,7 @@ void report_mce_event_json(struct trace_seq *s, struct mce_event *ev)
"\"mcgcap\": \"%#lx\", " \
"\"apicid\": \"%#x\" }",
JSON_REPORT_KEY,
+ ev->erst ? "erst_mce_record" : "mce_record",
(*ev->timestamp) ? ev->timestamp : NONE,
severity_strs[ev->severity],
ev->bank,
diff --git a/rasdaemon.c b/rasdaemon.c
index 02e219a..987c544 100644
--- a/rasdaemon.c
+++ b/rasdaemon.c
@@ -29,6 +29,7 @@
#include "ras-record.h"
#include "ras-report.h"
#include "ras-kmsg.h"
+#include "ras-erst.h"
/*
* Arguments(argp) handling logic and main
@@ -150,6 +151,21 @@ int main(int argc, char *argv[])
kmsg_monitor = 0;
#endif
+#ifdef HAVE_ERST
+ if (choices_disable != NULL &&
+ strlen(choices_disable) != 0 &&
+ strstr(choices_disable, "erst_mce"))
+ erst_mce_enable = 0;
+
+ if (choices_disable != NULL &&
+ strlen(choices_disable) != 0 &&
+ strstr(choices_disable, "erst_panic"))
+ erst_panic_enable = 0;
+
+ if (getenv(ERST_DELETE))
+ erst_delete = atoi(getenv(ERST_DELETE));
+#endif
+
#ifdef HAVE_MCE
const struct argp_option offline_options[] = {
{"smca", SMCA, 0, 0, "AMD SMCA Error Decoding"},
@@ -233,6 +249,15 @@ int main(int argc, char *argv[])
get_boot_time(&boot_time);
suspended_time = get_suspended_time();
+#ifdef HAVE_ERST
+ if (erst_panic_enable)
+ handle_erst_panic();
+#ifdef HAVE_MCE
+ if (erst_mce_enable)
+ handle_erst_mce();
+#endif
+#endif
+
handle_ras_events(args.record_events);
return 0;
--
2.33.1
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/src-anolis-os/rasdaemon.git
[email protected]:src-anolis-os/rasdaemon.git
src-anolis-os
rasdaemon
rasdaemon
a8

搜索帮助