代码拉取完成,页面将自动刷新
同步操作将从 src-openEuler/busybox 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
From aec213c228426fbad3cd9d4038dffecaf92947bf Mon Sep 17 00:00:00 2001
From: Ron Yorston <[email protected]>
Date: Wed, 27 Jan 2021 11:19:14 +0000
Subject: [PATCH 01/61] awk: allow printf('%c') to output NUL, closes 13486
Treat the output of printf as binary rather than a null-terminated
string so that NUL characters can be output.
This is considered to be a GNU extension, though it's also available
in mawk and FreeBSD's awk.
function old new delta
evaluate 3487 3504 +17
awk_printf 504 519 +15
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 32/0) Total: 32 bytes
Signed-off-by: Ron Yorston <[email protected]>
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 18 +++++++++++++++---
testsuite/awk.tests | 5 +++++
2 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 2c15f9e4e..b4f6a3741 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2155,7 +2155,10 @@ static int fmt_num(char *b, int size, const char *format, double n, int int_as_i
}
/* formatted output into an allocated buffer, return ptr to buffer */
-static char *awk_printf(node *n)
+#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
+# define awk_printf(a, b) awk_printf(a)
+#endif
+static char *awk_printf(node *n, int *len)
{
char *b = NULL;
char *fmt, *s, *f;
@@ -2209,6 +2212,10 @@ static char *awk_printf(node *n)
nvfree(v);
b = xrealloc(b, i + 1);
b[i] = '\0';
+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
+ if (len)
+ *len = i;
+#endif
return b;
}
@@ -2666,6 +2673,7 @@ static var *evaluate(node *op, var *res)
case XC( OC_PRINT ):
case XC( OC_PRINTF ): {
FILE *F = stdout;
+ IF_FEATURE_AWK_GNU_EXTENSIONS(int len;)
if (op->r.n) {
rstream *rsm = newfile(R.s);
@@ -2703,8 +2711,12 @@ static var *evaluate(node *op, var *res)
fputs(getvar_s(intvar[ORS]), F);
} else { /* OC_PRINTF */
- char *s = awk_printf(op1);
+ char *s = awk_printf(op1, &len);
+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
+ fwrite(s, len, 1, F);
+#else
fputs(s, F);
+#endif
free(s);
}
fflush(F);
@@ -2978,7 +2990,7 @@ static var *evaluate(node *op, var *res)
break;
case XC( OC_SPRINTF ):
- setvar_p(res, awk_printf(op1));
+ setvar_p(res, awk_printf(op1, NULL));
break;
case XC( OC_UNARY ): {
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 92c83d719..cf9b722dc 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -383,6 +383,11 @@ testing "awk errors on missing delete arg" \
"awk -e '{delete}' 2>&1" "awk: cmd. line:1: Too few arguments\n" "" ""
SKIP=
+optional FEATURE_AWK_GNU_EXTENSIONS
+testing "awk printf('%c') can output NUL" \
+ "awk '{printf(\"hello%c null\n\", 0)}'" "hello\0 null\n" "" "\n"
+SKIP=
+
# testing "description" "command" "result" "infile" "stdin"
testing 'awk negative field access' \
'awk 2>&1 -- '\''{ $(-1) }'\' \
--
2.27.0
From 9dcd2d5cc91bde2d6cdd038ed23408057d6f6429 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Wed, 16 Jun 2021 09:18:08 +0200
Subject: [PATCH 02/61] awk: fix use-after-free in "$BIGNUM1 $BIGGERNUM2"
concat op
Second reference to a field reallocs/moves Fields[] array, but first ref
still tries to use the element where it was before move.
function old new delta
fsrealloc 94 106 +12
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 85 ++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 71 insertions(+), 14 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index b4f6a3741..48836298c 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1745,12 +1745,22 @@ static char* qrealloc(char *b, int n, int *size)
/* resize field storage space */
static void fsrealloc(int size)
{
- int i;
+ int i, newsize;
if (size >= maxfields) {
+ /* Sanity cap, easier than catering for overflows */
+ if (size > 0xffffff)
+ bb_die_memory_exhausted();
+
i = maxfields;
maxfields = size + 16;
- Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
+
+ newsize = maxfields * sizeof(Fields[0]);
+ debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
+ Fields = xrealloc(Fields, newsize);
+ debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
+ /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
+
for (; i < maxfields; i++) {
Fields[i].type = VF_SPECIAL;
Fields[i].string = NULL;
@@ -2614,20 +2624,30 @@ static var *evaluate(node *op, var *res)
/* execute inevitable things */
if (opinfo & OF_RES1)
L.v = evaluate(op1, v1);
- if (opinfo & OF_RES2)
- R.v = evaluate(op->r.n, v1+1);
if (opinfo & OF_STR1) {
L.s = getvar_s(L.v);
debug_printf_eval("L.s:'%s'\n", L.s);
}
- if (opinfo & OF_STR2) {
- R.s = getvar_s(R.v);
- debug_printf_eval("R.s:'%s'\n", R.s);
- }
if (opinfo & OF_NUM1) {
L_d = getvar_i(L.v);
debug_printf_eval("L_d:%f\n", L_d);
}
+ /* NB: Must get string/numeric values of L (done above)
+ * _before_ evaluate()'ing R.v: if both L and R are $NNNs,
+ * and right one is large, then L.v points to Fields[NNN1],
+ * second evaluate() reallocates and moves (!) Fields[],
+ * R.v points to Fields[NNN2] but L.v now points to freed mem!
+ * (Seen trying to evaluate "$444 $44444")
+ */
+ if (opinfo & OF_RES2) {
+ R.v = evaluate(op->r.n, v1+1);
+ //TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
+ //L.v = NULL;
+ }
+ if (opinfo & OF_STR2) {
+ R.s = getvar_s(R.v);
+ debug_printf_eval("R.s:'%s'\n", R.s);
+ }
debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
switch (XC(opinfo & OPCLSMASK)) {
@@ -2636,6 +2656,7 @@ static var *evaluate(node *op, var *res)
/* test pattern */
case XC( OC_TEST ):
+ debug_printf_eval("TEST\n");
if ((op1->info & OPCLSMASK) == OC_COMMA) {
/* it's range pattern */
if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
@@ -2653,25 +2674,32 @@ static var *evaluate(node *op, var *res)
/* just evaluate an expression, also used as unconditional jump */
case XC( OC_EXEC ):
+ debug_printf_eval("EXEC\n");
break;
/* branch, used in if-else and various loops */
case XC( OC_BR ):
+ debug_printf_eval("BR\n");
op = istrue(L.v) ? op->a.n : op->r.n;
break;
/* initialize for-in loop */
case XC( OC_WALKINIT ):
+ debug_printf_eval("WALKINIT\n");
hashwalk_init(L.v, iamarray(R.v));
break;
/* get next array item */
case XC( OC_WALKNEXT ):
+ debug_printf_eval("WALKNEXT\n");
op = hashwalk_next(L.v) ? op->a.n : op->r.n;
break;
case XC( OC_PRINT ):
- case XC( OC_PRINTF ): {
+ debug_printf_eval("PRINT /\n");
+ case XC( OC_PRINTF ):
+ debug_printf_eval("PRINTF\n");
+ {
FILE *F = stdout;
IF_FEATURE_AWK_GNU_EXTENSIONS(int len;)
@@ -2726,22 +2754,28 @@ static var *evaluate(node *op, var *res)
/* case XC( OC_DELETE ): - moved to happen before arg evaluation */
case XC( OC_NEWSOURCE ):
+ debug_printf_eval("NEWSOURCE\n");
g_progname = op->l.new_progname;
break;
case XC( OC_RETURN ):
+ debug_printf_eval("RETURN\n");
copyvar(res, L.v);
break;
case XC( OC_NEXTFILE ):
+ debug_printf_eval("NEXTFILE\n");
nextfile = TRUE;
case XC( OC_NEXT ):
+ debug_printf_eval("NEXT\n");
nextrec = TRUE;
case XC( OC_DONE ):
+ debug_printf_eval("DONE\n");
clrvar(res);
break;
case XC( OC_EXIT ):
+ debug_printf_eval("EXIT\n");
awk_exit(L_d);
/* -- recursive node type -- */
@@ -2761,15 +2795,18 @@ static var *evaluate(node *op, var *res)
break;
case XC( OC_IN ):
+ debug_printf_eval("IN\n");
setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
break;
case XC( OC_REGEXP ):
+ debug_printf_eval("REGEXP\n");
op1 = op;
L.s = getvar_s(intvar[F0]);
goto re_cont;
case XC( OC_MATCH ):
+ debug_printf_eval("MATCH\n");
op1 = op->r.n;
re_cont:
{
@@ -2795,6 +2832,7 @@ static var *evaluate(node *op, var *res)
break;
case XC( OC_TERNARY ):
+ debug_printf_eval("TERNARY\n");
if ((op->r.n->info & OPCLSMASK) != OC_COLON)
syntax_error(EMSG_POSSIBLE_ERROR);
res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
@@ -2803,6 +2841,7 @@ static var *evaluate(node *op, var *res)
case XC( OC_FUNC ): {
var *vbeg, *v;
const char *sv_progname;
+ debug_printf_eval("FUNC\n");
/* The body might be empty, still has to eval the args */
if (!op->r.n->info && !op->r.f->body.first)
@@ -2832,7 +2871,10 @@ static var *evaluate(node *op, var *res)
}
case XC( OC_GETLINE ):
- case XC( OC_PGETLINE ): {
+ debug_printf_eval("GETLINE /\n");
+ case XC( OC_PGETLINE ):
+ debug_printf_eval("PGETLINE\n");
+ {
rstream *rsm;
int i;
@@ -2873,6 +2915,7 @@ static var *evaluate(node *op, var *res)
/* simple builtins */
case XC( OC_FBLTIN ): {
double R_d = R_d; /* for compiler */
+ debug_printf_eval("FBLTIN\n");
switch (opn) {
case F_in:
@@ -2986,14 +3029,18 @@ static var *evaluate(node *op, var *res)
}
case XC( OC_BUILTIN ):
+ debug_printf_eval("BUILTIN\n");
res = exec_builtin(op, res);
break;
case XC( OC_SPRINTF ):
+ debug_printf_eval("SPRINTF\n");
setvar_p(res, awk_printf(op1, NULL));
break;
- case XC( OC_UNARY ): {
+ case XC( OC_UNARY ):
+ debug_printf_eval("UNARY\n");
+ {
double Ld, R_d;
Ld = R_d = getvar_i(R.v);
@@ -3023,7 +3070,9 @@ static var *evaluate(node *op, var *res)
break;
}
- case XC( OC_FIELD ): {
+ case XC( OC_FIELD ):
+ debug_printf_eval("FIELD\n");
+ {
int i = (int)getvar_i(R.v);
if (i < 0)
syntax_error(EMSG_NEGATIVE_FIELD);
@@ -3040,8 +3089,10 @@ static var *evaluate(node *op, var *res)
/* concatenation (" ") and index joining (",") */
case XC( OC_CONCAT ):
+ debug_printf_eval("CONCAT /\n");
case XC( OC_COMMA ): {
const char *sep = "";
+ debug_printf_eval("COMMA\n");
if ((opinfo & OPCLSMASK) == OC_COMMA)
sep = getvar_s(intvar[SUBSEP]);
setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
@@ -3049,17 +3100,22 @@ static var *evaluate(node *op, var *res)
}
case XC( OC_LAND ):
+ debug_printf_eval("LAND\n");
setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
break;
case XC( OC_LOR ):
+ debug_printf_eval("LOR\n");
setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
break;
case XC( OC_BINARY ):
- case XC( OC_REPLACE ): {
+ debug_printf_eval("BINARY /\n");
+ case XC( OC_REPLACE ):
+ debug_printf_eval("REPLACE\n");
+ {
double R_d = getvar_i(R.v);
- debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
+ debug_printf_eval("R_d:%f opn:%c\n", R_d, opn);
switch (opn) {
case '+':
L_d += R_d;
@@ -3095,6 +3151,7 @@ static var *evaluate(node *op, var *res)
case XC( OC_COMPARE ): {
int i = i; /* for compiler */
double Ld;
+ debug_printf_eval("COMPARE\n");
if (is_numeric(L.v) && is_numeric(R.v)) {
Ld = getvar_i(L.v) - getvar_i(R.v);
--
2.27.0
From 1d5e5492dd8368ee3870bcd390754aa7c3f8956c Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 18 Jun 2021 16:35:27 +0200
Subject: [PATCH 03/61] awk: after preinc/dec, only allow variable, field ref,
array ref, or another preinc/dec
Accepting nonsense like "--4", and even "-- -4" is confusing.
function old new delta
parse_expr 917 938 +21
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 87 ++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 69 insertions(+), 18 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 48836298c..2563722f9 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -66,6 +66,8 @@
#endif
#ifndef debug_printf_parse
# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
+#else
+# define debug_parse_print_tc(...) ((void)0)
#endif
@@ -210,13 +212,13 @@ typedef struct tsplitter_s {
#define TC_SEQTERM (1 << 1) /* ) */
#define TC_REGEXP (1 << 2) /* /.../ */
#define TC_OUTRDR (1 << 3) /* | > >> */
-#define TC_UOPPOST (1 << 4) /* unary postfix operator */
-#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
+#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
+#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
#define TC_BINOPX (1 << 6) /* two-opnd operator */
#define TC_IN (1 << 7)
#define TC_COMMA (1 << 8)
#define TC_PIPE (1 << 9) /* input redirection pipe */
-#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
+#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
#define TC_ARRTERM (1 << 11) /* ] */
#define TC_GRPSTART (1 << 12) /* { */
#define TC_GRPTERM (1 << 13) /* } */
@@ -243,14 +245,51 @@ typedef struct tsplitter_s {
#define TC_STRING (1 << 29)
#define TC_NUMBER (1 << 30)
-#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
+#ifndef debug_parse_print_tc
+#define debug_parse_print_tc(n) do { \
+if ((n) & TC_SEQSTART) debug_printf_parse(" SEQSTART"); \
+if ((n) & TC_SEQTERM ) debug_printf_parse(" SEQTERM" ); \
+if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \
+if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \
+if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \
+if ((n) & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" ); \
+if ((n) & TC_BINOPX ) debug_printf_parse(" BINOPX" ); \
+if ((n) & TC_IN ) debug_printf_parse(" IN" ); \
+if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \
+if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \
+if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \
+if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \
+if ((n) & TC_GRPSTART) debug_printf_parse(" GRPSTART"); \
+if ((n) & TC_GRPTERM ) debug_printf_parse(" GRPTERM" ); \
+if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \
+if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \
+if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \
+if ((n) & TC_WHILE ) debug_printf_parse(" WHILE" ); \
+if ((n) & TC_ELSE ) debug_printf_parse(" ELSE" ); \
+if ((n) & TC_BUILTIN ) debug_printf_parse(" BUILTIN" ); \
+if ((n) & TC_LENGTH ) debug_printf_parse(" LENGTH" ); \
+if ((n) & TC_GETLINE ) debug_printf_parse(" GETLINE" ); \
+if ((n) & TC_FUNCDECL) debug_printf_parse(" FUNCDECL"); \
+if ((n) & TC_BEGIN ) debug_printf_parse(" BEGIN" ); \
+if ((n) & TC_END ) debug_printf_parse(" END" ); \
+if ((n) & TC_EOF ) debug_printf_parse(" EOF" ); \
+if ((n) & TC_VARIABLE) debug_printf_parse(" VARIABLE"); \
+if ((n) & TC_ARRAY ) debug_printf_parse(" ARRAY" ); \
+if ((n) & TC_FUNCTION) debug_printf_parse(" FUNCTION"); \
+if ((n) & TC_STRING ) debug_printf_parse(" STRING" ); \
+if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
+} while (0)
+#endif
/* combined token classes */
+#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
+
#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
| TC_BUILTIN | TC_LENGTH | TC_GETLINE \
| TC_SEQSTART | TC_STRING | TC_NUMBER)
+#define TC_LVALUE (TC_VARIABLE | TC_ARRAY)
#define TC_STATEMNT (TC_STATX | TC_WHILE)
#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
@@ -284,7 +323,6 @@ typedef struct tsplitter_s {
#define OF_CHECKED 0x200000
#define OF_REQUIRED 0x400000
-
/* combined operator flags */
#define xx 0
#define xV OF_RES2
@@ -313,10 +351,8 @@ typedef struct tsplitter_s {
#define PRIMASK2 0x7E000000
/* Operation classes */
-
#define SHIFT_TIL_THIS 0x0600
#define RECUR_FROM_THIS 0x1000
-
enum {
OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
@@ -411,7 +447,9 @@ static const uint32_t tokeninfo[] ALIGN4 = {
OC_REGEXP,
xS|'a', xS|'w', xS|'|',
OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
- OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
+#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
+#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
+ TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
@@ -1070,6 +1108,10 @@ static uint32_t next_token(uint32_t expected)
uint32_t tc;
const uint32_t *ti;
+ debug_printf_parse("%s() expected(%x):", __func__, expected);
+ debug_parse_print_tc(expected);
+ debug_printf_parse("\n");
+
if (t_rollback) {
debug_printf_parse("%s: using rolled-back token\n", __func__);
t_rollback = FALSE;
@@ -1226,7 +1268,9 @@ static uint32_t next_token(uint32_t expected)
EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
}
- debug_printf_parse("%s: returning, ltclass:%x t_double:%f\n", __func__, ltclass, t_double);
+ debug_printf_parse("%s: returning, t_double:%f ltclass:", __func__, t_double);
+ debug_parse_print_tc(ltclass);
+ debug_printf_parse("\n");
return ltclass;
#undef concat_inserted
#undef save_tclass
@@ -1266,7 +1310,7 @@ static node *condition(void)
/* parse expression terminated by given argument, return ptr
* to built subtree. Terminator is eaten by parse_expr */
-static node *parse_expr(uint32_t iexp)
+static node *parse_expr(uint32_t term_tc)
{
node sn;
node *cn = &sn;
@@ -1274,13 +1318,15 @@ static node *parse_expr(uint32_t iexp)
uint32_t tc, xtc;
var *v;
- debug_printf_parse("%s(%x)\n", __func__, iexp);
+ debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
+ debug_parse_print_tc(term_tc);
+ debug_printf_parse("\n");
sn.info = PRIMASK;
sn.r.n = sn.a.n = glptr = NULL;
- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
+ xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | term_tc;
- while (!((tc = next_token(xtc)) & iexp)) {
+ while (!((tc = next_token(xtc)) & term_tc)) {
if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
/* input redirection (<) attached to glptr node */
@@ -1313,25 +1359,28 @@ static node *parse_expr(uint32_t iexp)
next_token(TC_GETLINE);
/* give maximum priority to this pipe */
cn->info &= ~PRIMASK;
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
+ xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
}
} else {
cn->r.n = vn;
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
+ xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
}
vn->a.n = cn;
} else {
- debug_printf_parse("%s: other\n", __func__);
+ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
/* for operands and prefix-unary operators, attach them
* to last node */
vn = cn;
cn = vn->r.n = new_node(t_info);
cn->a.n = vn;
+
xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
+ if (t_info == TI_PREINC || t_info == TI_PREDEC)
+ xtc = TC_LVALUE | TC_UOPPRE1;
if (tc & (TC_OPERAND | TC_REGEXP)) {
debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
- xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
+ xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | term_tc;
/* one should be very careful with switch on tclass -
* only simple tclasses should be used! */
switch (tc) {
@@ -1388,7 +1437,7 @@ static node *parse_expr(uint32_t iexp)
case TC_GETLINE:
debug_printf_parse("%s: TC_GETLINE\n", __func__);
glptr = cn;
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
+ xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
break;
case TC_BUILTIN:
@@ -1603,6 +1652,8 @@ static void parse_program(char *p)
func *f;
var *v;
+ debug_printf_parse("%s()\n", __func__);
+
g_pos = p;
t_lineno = 1;
while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
--
2.27.0
From 3d0acb8934f496021a63471ef9e29c87520612a0 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sun, 20 Jun 2021 22:52:29 +0200
Subject: [PATCH 04/61] qwk: make code clearer, no actual code changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 2563722f9..5f1d670a4 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -455,7 +455,8 @@ static const uint32_t tokeninfo[] ALIGN4 = {
OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
- OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
+#define TI_LESS (OC_COMPARE|VV|P(39)|2)
+ TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
OC_IN|SV|P(49), /* TC_IN */
OC_COMMA|SS|P(80),
@@ -1328,7 +1329,7 @@ static node *parse_expr(uint32_t term_tc)
while (!((tc = next_token(xtc)) & term_tc)) {
- if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
+ if (glptr && (t_info == TI_LESS)) {
/* input redirection (<) attached to glptr node */
debug_printf_parse("%s: input redir\n", __func__);
cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
--
2.27.0
From 3c18df6595f8efc0229d7afc948b8ef38fb6f1aa Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 25 Jun 2021 19:38:27 +0200
Subject: [PATCH 05/61] awk: more efficient -f FILE, document what "some trick
in next_token" is
function old new delta
awk_main 890 898 +8
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 33 ++++++++++++++++++++++++---------
1 file changed, 24 insertions(+), 9 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 5f1d670a4..1b23c17d2 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1217,6 +1217,8 @@ static uint32_t next_token(uint32_t expected)
if (!isalnum_(*p))
syntax_error(EMSG_UNEXP_TOKEN); /* no */
/* yes */
+/* "move name one char back" trick: we need a byte for NUL terminator */
+/* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */
t_string = --p;
while (isalnum_(*++p)) {
p[-1] = *p;
@@ -3345,7 +3347,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
llist_t *list_e = NULL;
#endif
- int i, j;
+ int i;
var *v;
var tv;
char **envp;
@@ -3417,30 +3419,43 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
bb_show_usage();
}
while (list_f) {
- char *s = NULL;
- FILE *from_file;
+ int fd;
+ char *s;
g_progname = llist_pop(&list_f);
- from_file = xfopen_stdin(g_progname);
- /* one byte is reserved for some trick in next_token */
- for (i = j = 1; j > 0; i += j) {
- s = xrealloc(s, i + 4096);
- j = fread(s + i, 1, 4094, from_file);
+ fd = xopen_stdin(g_progname);
+ /* 1st byte is reserved for "move name one char back" trick in next_token */
+ i = 1;
+ s = NULL;
+ for (;;) {
+ int sz;
+ s = xrealloc(s, i + 1000);
+ sz = safe_read(fd, s + i, 1000);
+ if (sz <= 0)
+ break;
+ i += sz;
}
+ s = xrealloc(s, i + 1); /* trim unused 999 bytes */
s[i] = '\0';
- fclose(from_file);
+ close(fd);
parse_program(s + 1);
free(s);
}
g_progname = "cmd. line";
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
while (list_e) {
+ /* NB: "move name one char back" trick in next_token
+ * can use argv[i][-1] here.
+ */
parse_program(llist_pop(&list_e));
}
#endif
if (!(opt & (OPT_f | OPT_e))) {
if (!*argv)
bb_show_usage();
+ /* NB: "move name one char back" trick in next_token
+ * can use argv[i][-1] here.
+ */
parse_program(*argv++);
}
--
2.27.0
From f8243879801f8d9d9fffbde592aee4264aa30d71 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 25 Jun 2021 19:41:05 +0200
Subject: [PATCH 06/61] awk: move locals deeper into scopes where they are
used, no logic changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 62 ++++++++++++++++++++++++++-------------------------
1 file changed, 32 insertions(+), 30 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 1b23c17d2..86076d7b6 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -3254,20 +3254,19 @@ static var *evaluate(node *op, var *res)
static int awk_exit(int r)
{
- var tv;
unsigned i;
- hash_item *hi;
-
- zero_out_var(&tv);
if (!exiting) {
+ var tv;
exiting = TRUE;
nextrec = FALSE;
+ zero_out_var(&tv);
evaluate(endseq.first, &tv);
}
/* waiting for children */
for (i = 0; i < fdhash->csize; i++) {
+ hash_item *hi;
hi = fdhash->items[i];
while (hi) {
if (hi->data.rs.F && hi->data.rs.is_pipe)
@@ -3348,11 +3347,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
llist_t *list_e = NULL;
#endif
int i;
- var *v;
var tv;
- char **envp;
- char *vnames = (char *)vNames; /* cheat */
- char *vvalues = (char *)vValues;
INIT_G();
@@ -3361,8 +3356,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
if (ENABLE_LOCALE_SUPPORT)
setlocale(LC_NUMERIC, "C");
- zero_out_var(&tv);
-
/* allocate global buffer */
g_buf = xmalloc(MAXVARFMT + 1);
@@ -3372,16 +3365,21 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
fnhash = hash_init();
/* initialize variables */
- for (i = 0; *vnames; i++) {
- intvar[i] = v = newvar(nextword(&vnames));
- if (*vvalues != '\377')
- setvar_s(v, nextword(&vvalues));
- else
- setvar_i(v, 0);
-
- if (*vnames == '*') {
- v->type |= VF_SPECIAL;
- vnames++;
+ {
+ char *vnames = (char *)vNames; /* cheat */
+ char *vvalues = (char *)vValues;
+ for (i = 0; *vnames; i++) {
+ var *v;
+ intvar[i] = v = newvar(nextword(&vnames));
+ if (*vvalues != '\377')
+ setvar_s(v, nextword(&vvalues));
+ else
+ setvar_i(v, 0);
+
+ if (*vnames == '*') {
+ v->type |= VF_SPECIAL;
+ vnames++;
+ }
}
}
@@ -3393,16 +3391,19 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
newfile("/dev/stderr")->F = stderr;
/* Huh, people report that sometimes environ is NULL. Oh well. */
- if (environ) for (envp = environ; *envp; envp++) {
- /* environ is writable, thus we don't strdup it needlessly */
- char *s = *envp;
- char *s1 = strchr(s, '=');
- if (s1) {
- *s1 = '\0';
- /* Both findvar and setvar_u take const char*
- * as 2nd arg -> environment is not trashed */
- setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
- *s1 = '=';
+ if (environ) {
+ char **envp;
+ for (envp = environ; *envp; envp++) {
+ /* environ is writable, thus we don't strdup it needlessly */
+ char *s = *envp;
+ char *s1 = strchr(s, '=');
+ if (s1) {
+ *s1 = '\0';
+ /* Both findvar and setvar_u take const char*
+ * as 2nd arg -> environment is not trashed */
+ setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
+ *s1 = '=';
+ }
}
}
opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
@@ -3466,6 +3467,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
setari_u(intvar[ARGV], ++i, *argv++);
setvar_i(intvar[ARGC], i + 1);
+ zero_out_var(&tv);
evaluate(beginseq.first, &tv);
if (!mainseq.first && !endseq.first)
awk_exit(EXIT_SUCCESS);
--
2.27.0
From b52a50128d64e1f601e17507ffc118c180ef7b3d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 01:03:42 +0200
Subject: [PATCH 07/61] awk: remove redundant check
function old new delta
next_token 785 784 -1
parse_program 337 328 -9
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-10) Total: -10 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 32 ++++++++++++++++++--------------
1 file changed, 18 insertions(+), 14 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 86076d7b6..9826a57c6 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1093,8 +1093,9 @@ static void nvfree(var *v)
/* ------- awk program text parsing ------- */
-/* Parse next token pointed by global pos, place results into global ttt.
- * If token isn't expected, give away. Return token class
+/* Parse next token pointed by global pos, place results into global t_XYZ variables.
+ * If token isn't expected, print error message and die.
+ * Return token class (also store it in t_tclass).
*/
static uint32_t next_token(uint32_t expected)
{
@@ -1248,33 +1249,35 @@ static uint32_t next_token(uint32_t expected)
goto readnext;
/* insert concatenation operator when needed */
- debug_printf_parse("%s: %x %x %x concat_inserted?\n", __func__,
- (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP));
+ debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
+ (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP),
+ !(ltclass == TC_LENGTH && tc == TC_SEQSTART));
if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
&& !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
) {
concat_inserted = TRUE;
save_tclass = tc;
save_info = t_info;
- tc = TC_BINOP;
+ tc = TC_BINOPX;
t_info = OC_CONCAT | SS | P(35);
}
- debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, t_tclass);
t_tclass = tc;
+ debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
}
- ltclass = t_tclass;
-
/* Are we ready for this? */
- if (!(ltclass & expected)) {
+ if (!(t_tclass & expected)) {
syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
}
- debug_printf_parse("%s: returning, t_double:%f ltclass:", __func__, t_double);
- debug_parse_print_tc(ltclass);
+ debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
+ debug_parse_print_tc(t_tclass);
debug_printf_parse("\n");
- return ltclass;
+
+ ltclass = t_tclass;
+
+ return t_tclass;
#undef concat_inserted
#undef save_tclass
#undef save_info
@@ -1700,8 +1703,9 @@ static void parse_program(char *p)
/* Arg followed either by end of arg list or 1 comma */
if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
break;
- if (t_tclass != TC_COMMA)
- syntax_error(EMSG_UNEXP_TOKEN);
+//Impossible: next_token() above would error out and die
+// if (t_tclass != TC_COMMA)
+// syntax_error(EMSG_UNEXP_TOKEN);
}
seq = &f->body;
chain_group();
--
2.27.0
From 96368c3613c1b01c42b7b382d01142a07c919f60 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 01:09:08 +0200
Subject: [PATCH 08/61] awk: make ltclass ("last token class") local to
next_token()
function old new delta
next_token 784 790 +6
next_input_file 219 216 -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/1 up/down: 6/-3) Total: 3 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 24 ++++++++++--------------
1 file changed, 10 insertions(+), 14 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 9826a57c6..418bda160 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -556,7 +556,6 @@ struct globals2 {
uint32_t next_token__save_tclass;
uint32_t next_token__save_info;
- uint32_t next_token__ltclass;
smallint next_token__concat_inserted;
smallint next_input_file__files_happen;
@@ -615,7 +614,7 @@ struct globals2 {
#define rsplitter (G.rsplitter )
#define INIT_G() do { \
SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
- G.next_token__ltclass = TC_OPTERM; \
+ t_tclass = TC_OPTERM; \
G.evaluate__seed = 1; \
} while (0)
@@ -1102,13 +1101,13 @@ static uint32_t next_token(uint32_t expected)
#define concat_inserted (G.next_token__concat_inserted)
#define save_tclass (G.next_token__save_tclass)
#define save_info (G.next_token__save_info)
-/* Initialized to TC_OPTERM: */
-#define ltclass (G.next_token__ltclass)
char *p, *s;
const char *tl;
- uint32_t tc;
const uint32_t *ti;
+ uint32_t tc, last_token_class;
+
+ last_token_class = t_tclass; /* t_tclass is initialized to TC_OPTERM */
debug_printf_parse("%s() expected(%x):", __func__, expected);
debug_parse_print_tc(expected);
@@ -1245,15 +1244,15 @@ static uint32_t next_token(uint32_t expected)
g_pos = p;
/* skipping newlines in some cases */
- if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
+ if ((last_token_class & TC_NOTERM) && (tc & TC_NEWLINE))
goto readnext;
/* insert concatenation operator when needed */
debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
- (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP),
- !(ltclass == TC_LENGTH && tc == TC_SEQSTART));
- if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
- && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
+ (last_token_class & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP),
+ !(last_token_class == TC_LENGTH && tc == TC_SEQSTART));
+ if ((last_token_class & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
+ && !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
) {
concat_inserted = TRUE;
save_tclass = tc;
@@ -1267,7 +1266,7 @@ static uint32_t next_token(uint32_t expected)
}
/* Are we ready for this? */
if (!(t_tclass & expected)) {
- syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
+ syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
}
@@ -1275,13 +1274,10 @@ static uint32_t next_token(uint32_t expected)
debug_parse_print_tc(t_tclass);
debug_printf_parse("\n");
- ltclass = t_tclass;
-
return t_tclass;
#undef concat_inserted
#undef save_tclass
#undef save_info
-#undef ltclass
}
static void rollback_token(void)
--
2.27.0
From 8b51ddd054a3454171440035ed7f125483e9697c Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 01:23:37 +0200
Subject: [PATCH 09/61] awk: use TS_foo for combined token classes. No code
changes
Confusion with "simple" classes was the cause of a bug fixed by previous commit
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 128 +++++++++++++++++++++++++-------------------------
1 file changed, 64 insertions(+), 64 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 418bda160..764a3dd49 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -281,39 +281,39 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
} while (0)
#endif
-/* combined token classes */
-#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
+/* combined token classes ("token [class] sets") */
+#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
-#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
-//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
-#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
- | TC_SEQSTART | TC_STRING | TC_NUMBER)
-#define TC_LVALUE (TC_VARIABLE | TC_ARRAY)
+#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
+//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
+#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
+ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
+ | TC_SEQSTART | TC_STRING | TC_NUMBER)
-#define TC_STATEMNT (TC_STATX | TC_WHILE)
-#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
+#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
+#define TS_STATEMNT (TC_STATX | TC_WHILE)
+#define TS_OPTERM (TC_SEMICOL | TC_NEWLINE)
/* word tokens, cannot mean something else if not expected */
-#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \
- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
- | TC_FUNCDECL | TC_BEGIN | TC_END)
+#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \
+ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
+ | TC_FUNCDECL | TC_BEGIN | TC_END)
/* discard newlines after these */
-#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
- | TC_BINOP | TC_OPTERM)
+#define TS_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
+ | TS_BINOP | TS_OPTERM)
/* what can expression begin with */
-#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
+#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
/* what can group begin with */
-#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
+#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_GRPSTART)
-/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
+/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
/* operator is inserted between them */
-#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
+#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
| TC_STRING | TC_NUMBER | TC_UOPPOST \
| TC_LENGTH)
-#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
+#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
#define OF_RES1 0x010000
#define OF_RES2 0x020000
@@ -614,7 +614,7 @@ struct globals2 {
#define rsplitter (G.rsplitter )
#define INIT_G() do { \
SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
- t_tclass = TC_OPTERM; \
+ t_tclass = TS_OPTERM; \
G.evaluate__seed = 1; \
} while (0)
@@ -1107,7 +1107,7 @@ static uint32_t next_token(uint32_t expected)
const uint32_t *ti;
uint32_t tc, last_token_class;
- last_token_class = t_tclass; /* t_tclass is initialized to TC_OPTERM */
+ last_token_class = t_tclass; /* t_tclass is initialized to TS_OPTERM */
debug_printf_parse("%s() expected(%x):", __func__, expected);
debug_parse_print_tc(expected);
@@ -1198,9 +1198,9 @@ static uint32_t next_token(uint32_t expected)
* token matches,
* and it's not a longer word,
*/
- if ((tc & (expected | TC_WORD | TC_NEWLINE))
+ if ((tc & (expected | TS_WORD | TC_NEWLINE))
&& strncmp(p, tl, l) == 0
- && !((tc & TC_WORD) && isalnum_(p[l]))
+ && !((tc & TS_WORD) && isalnum_(p[l]))
) {
/* then this is what we are looking for */
t_info = *ti;
@@ -1244,14 +1244,14 @@ static uint32_t next_token(uint32_t expected)
g_pos = p;
/* skipping newlines in some cases */
- if ((last_token_class & TC_NOTERM) && (tc & TC_NEWLINE))
+ if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
goto readnext;
/* insert concatenation operator when needed */
debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
- (last_token_class & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP),
+ (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
!(last_token_class == TC_LENGTH && tc == TC_SEQSTART));
- if ((last_token_class & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
+ if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
&& !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
) {
concat_inserted = TRUE;
@@ -1317,7 +1317,7 @@ static node *parse_expr(uint32_t term_tc)
node sn;
node *cn = &sn;
node *vn, *glptr;
- uint32_t tc, xtc;
+ uint32_t tc, expected_tc;
var *v;
debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
@@ -1326,20 +1326,20 @@ static node *parse_expr(uint32_t term_tc)
sn.info = PRIMASK;
sn.r.n = sn.a.n = glptr = NULL;
- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | term_tc;
+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
- while (!((tc = next_token(xtc)) & term_tc)) {
+ while (!((tc = next_token(expected_tc)) & term_tc)) {
if (glptr && (t_info == TI_LESS)) {
/* input redirection (<) attached to glptr node */
debug_printf_parse("%s: input redir\n", __func__);
cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
cn->a.n = glptr;
- xtc = TC_OPERAND | TC_UOPPRE;
+ expected_tc = TS_OPERAND | TS_UOPPRE;
glptr = NULL;
- } else if (tc & (TC_BINOP | TC_UOPPOST)) {
- debug_printf_parse("%s: TC_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
+ } else if (tc & (TS_BINOP | TC_UOPPOST)) {
+ debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
/* for binary and postfix-unary operators, jump back over
* previous operators with higher priority */
vn = cn;
@@ -1353,19 +1353,19 @@ static node *parse_expr(uint32_t term_tc)
t_info += P(6);
cn = vn->a.n->r.n = new_node(t_info);
cn->a.n = vn->a.n;
- if (tc & TC_BINOP) {
+ if (tc & TS_BINOP) {
cn->l.n = vn;
- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
if ((t_info & OPCLSMASK) == OC_PGETLINE) {
/* it's a pipe */
next_token(TC_GETLINE);
/* give maximum priority to this pipe */
cn->info &= ~PRIMASK;
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
}
} else {
cn->r.n = vn;
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
}
vn->a.n = cn;
@@ -1377,14 +1377,14 @@ static node *parse_expr(uint32_t term_tc)
cn = vn->r.n = new_node(t_info);
cn->a.n = vn;
- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
if (t_info == TI_PREINC || t_info == TI_PREDEC)
- xtc = TC_LVALUE | TC_UOPPRE1;
- if (tc & (TC_OPERAND | TC_REGEXP)) {
- debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
- xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | term_tc;
+ expected_tc = TS_LVALUE | TC_UOPPRE1;
+ if (tc & (TS_OPERAND | TC_REGEXP)) {
+ debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
+ expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
/* one should be very careful with switch on tclass -
- * only simple tclasses should be used! */
+ * only simple tclasses should be used (TC_xyz, not TS_xyz) */
switch (tc) {
case TC_VARIABLE:
case TC_ARRAY:
@@ -1412,7 +1412,7 @@ static node *parse_expr(uint32_t term_tc)
setvar_i(v, t_double);
else {
setvar_s(v, t_string);
- xtc &= ~TC_UOPPOST; /* "str"++ is not allowed */
+ expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
}
break;
@@ -1439,7 +1439,7 @@ static node *parse_expr(uint32_t term_tc)
case TC_GETLINE:
debug_printf_parse("%s: TC_GETLINE\n", __func__);
glptr = cn;
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
break;
case TC_BUILTIN:
@@ -1450,7 +1450,7 @@ static node *parse_expr(uint32_t term_tc)
case TC_LENGTH:
debug_printf_parse("%s: TC_LENGTH\n", __func__);
next_token(TC_SEQSTART /* length(...) */
- | TC_OPTERM /* length; (or newline)*/
+ | TS_OPTERM /* length; (or newline)*/
| TC_GRPTERM /* length } */
| TC_BINOPX /* length <op> NUM */
| TC_COMMA /* print length, 1 */
@@ -1464,7 +1464,7 @@ static node *parse_expr(uint32_t term_tc)
}
}
}
- }
+ } /* while() */
debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
return sn.r.n;
@@ -1497,7 +1497,7 @@ static void chain_expr(uint32_t info)
n = chain_node(info);
- n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
+ n->l.n = parse_expr(TS_OPTERM | TC_GRPTERM);
if ((info & OF_REQUIRED) && !n->l.n)
syntax_error(EMSG_TOO_FEW_ARGS);
@@ -1535,12 +1535,12 @@ static void chain_group(void)
node *n, *n2, *n3;
do {
- c = next_token(TC_GRPSEQ);
+ c = next_token(TS_GRPSEQ);
} while (c & TC_NEWLINE);
if (c & TC_GRPSTART) {
debug_printf_parse("%s: TC_GRPSTART\n", __func__);
- while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
+ while (next_token(TS_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
if (t_tclass & TC_NEWLINE)
continue;
@@ -1548,13 +1548,13 @@ static void chain_group(void)
chain_group();
}
debug_printf_parse("%s: TC_GRPTERM\n", __func__);
- } else if (c & (TC_OPSEQ | TC_OPTERM)) {
- debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
+ } else if (c & (TS_OPSEQ | TS_OPTERM)) {
+ debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
rollback_token();
chain_expr(OC_EXEC | Vx);
} else {
- /* TC_STATEMNT */
- debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
+ /* TS_STATEMNT */
+ debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
switch (t_info & OPCLSMASK) {
case ST_IF:
debug_printf_parse("%s: ST_IF\n", __func__);
@@ -1563,7 +1563,7 @@ static void chain_group(void)
chain_group();
n2 = chain_node(OC_EXEC);
n->r.n = seq->last;
- if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
+ if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
chain_group();
n2->a.n = seq->last;
} else {
@@ -1616,10 +1616,10 @@ static void chain_group(void)
case OC_PRINTF:
debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
n = chain_node(t_info);
- n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
+ n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM);
if (t_tclass & TC_OUTRDR) {
n->info |= t_info;
- n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
+ n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM);
}
if (t_tclass & TC_GRPTERM)
rollback_token();
@@ -1658,11 +1658,11 @@ static void parse_program(char *p)
g_pos = p;
t_lineno = 1;
- while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
- TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
+ while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_GRPSTART |
+ TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
- if (tclass & TC_OPTERM) {
- debug_printf_parse("%s: TC_OPTERM\n", __func__);
+ if (tclass & TS_OPTERM) {
+ debug_printf_parse("%s: TS_OPTERM\n", __func__);
continue;
}
@@ -1706,11 +1706,11 @@ static void parse_program(char *p)
seq = &f->body;
chain_group();
clear_array(ahash);
- } else if (tclass & TC_OPSEQ) {
- debug_printf_parse("%s: TC_OPSEQ\n", __func__);
+ } else if (tclass & TS_OPSEQ) {
+ debug_printf_parse("%s: TS_OPSEQ\n", __func__);
rollback_token();
cn = chain_node(OC_TEST);
- cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
+ cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_GRPSTART);
if (t_tclass & TC_GRPSTART) {
debug_printf_parse("%s: TC_GRPSTART\n", __func__);
rollback_token();
--
2.27.0
From 01cbacb45972e14aa3072bf539c391dd03ed3955 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 01:30:49 +0200
Subject: [PATCH 10/61] awk: deindent code block, no code changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 177 +++++++++++++++++++++++++-------------------------
1 file changed, 90 insertions(+), 87 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 764a3dd49..9a3b63df6 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1337,8 +1337,9 @@ static node *parse_expr(uint32_t term_tc)
cn->a.n = glptr;
expected_tc = TS_OPERAND | TS_UOPPRE;
glptr = NULL;
-
- } else if (tc & (TS_BINOP | TC_UOPPOST)) {
+ continue;
+ }
+ if (tc & (TS_BINOP | TC_UOPPOST)) {
debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
/* for binary and postfix-unary operators, jump back over
* previous operators with higher priority */
@@ -1368,101 +1369,103 @@ static node *parse_expr(uint32_t term_tc)
expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
}
vn->a.n = cn;
+ continue;
+ }
- } else {
- debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
- /* for operands and prefix-unary operators, attach them
- * to last node */
- vn = cn;
- cn = vn->r.n = new_node(t_info);
- cn->a.n = vn;
+ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
+ /* for operands and prefix-unary operators, attach them
+ * to last node */
+ vn = cn;
+ cn = vn->r.n = new_node(t_info);
+ cn->a.n = vn;
- expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
- if (t_info == TI_PREINC || t_info == TI_PREDEC)
- expected_tc = TS_LVALUE | TC_UOPPRE1;
- if (tc & (TS_OPERAND | TC_REGEXP)) {
- debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
- expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
- /* one should be very careful with switch on tclass -
- * only simple tclasses should be used (TC_xyz, not TS_xyz) */
- switch (tc) {
- case TC_VARIABLE:
- case TC_ARRAY:
- debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
- cn->info = OC_VAR;
- v = hash_search(ahash, t_string);
- if (v != NULL) {
- cn->info = OC_FNARG;
- cn->l.aidx = v->x.aidx;
- } else {
- cn->l.v = newvar(t_string);
- }
- if (tc & TC_ARRAY) {
- cn->info |= xS;
- cn->r.n = parse_expr(TC_ARRTERM);
- }
- break;
+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
+ if (t_info == TI_PREINC || t_info == TI_PREDEC)
+ expected_tc = TS_LVALUE | TC_UOPPRE1;
- case TC_NUMBER:
- case TC_STRING:
- debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
- cn->info = OC_VAR;
- v = cn->l.v = xzalloc(sizeof(var));
- if (tc & TC_NUMBER)
- setvar_i(v, t_double);
- else {
- setvar_s(v, t_string);
- expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
- }
- break;
+ if (!(tc & (TS_OPERAND | TC_REGEXP)))
+ continue;
- case TC_REGEXP:
- debug_printf_parse("%s: TC_REGEXP\n", __func__);
- mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
- break;
+ debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
+ expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
+ /* one should be very careful with switch on tclass -
+ * only simple tclasses should be used (TC_xyz, not TS_xyz) */
+ switch (tc) {
+ case TC_VARIABLE:
+ case TC_ARRAY:
+ debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
+ cn->info = OC_VAR;
+ v = hash_search(ahash, t_string);
+ if (v != NULL) {
+ cn->info = OC_FNARG;
+ cn->l.aidx = v->x.aidx;
+ } else {
+ cn->l.v = newvar(t_string);
+ }
+ if (tc & TC_ARRAY) {
+ cn->info |= xS;
+ cn->r.n = parse_expr(TC_ARRTERM);
+ }
+ break;
- case TC_FUNCTION:
- debug_printf_parse("%s: TC_FUNCTION\n", __func__);
- cn->info = OC_FUNC;
- cn->r.f = newfunc(t_string);
- cn->l.n = condition();
- break;
+ case TC_NUMBER:
+ case TC_STRING:
+ debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
+ cn->info = OC_VAR;
+ v = cn->l.v = xzalloc(sizeof(var));
+ if (tc & TC_NUMBER)
+ setvar_i(v, t_double);
+ else {
+ setvar_s(v, t_string);
+ expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
+ }
+ break;
- case TC_SEQSTART:
- debug_printf_parse("%s: TC_SEQSTART\n", __func__);
- cn = vn->r.n = parse_expr(TC_SEQTERM);
- if (!cn)
- syntax_error("Empty sequence");
- cn->a.n = vn;
- break;
+ case TC_REGEXP:
+ debug_printf_parse("%s: TC_REGEXP\n", __func__);
+ mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
+ break;
- case TC_GETLINE:
- debug_printf_parse("%s: TC_GETLINE\n", __func__);
- glptr = cn;
- expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
- break;
+ case TC_FUNCTION:
+ debug_printf_parse("%s: TC_FUNCTION\n", __func__);
+ cn->info = OC_FUNC;
+ cn->r.f = newfunc(t_string);
+ cn->l.n = condition();
+ break;
- case TC_BUILTIN:
- debug_printf_parse("%s: TC_BUILTIN\n", __func__);
- cn->l.n = condition();
- break;
+ case TC_SEQSTART:
+ debug_printf_parse("%s: TC_SEQSTART\n", __func__);
+ cn = vn->r.n = parse_expr(TC_SEQTERM);
+ if (!cn)
+ syntax_error("Empty sequence");
+ cn->a.n = vn;
+ break;
- case TC_LENGTH:
- debug_printf_parse("%s: TC_LENGTH\n", __func__);
- next_token(TC_SEQSTART /* length(...) */
- | TS_OPTERM /* length; (or newline)*/
- | TC_GRPTERM /* length } */
- | TC_BINOPX /* length <op> NUM */
- | TC_COMMA /* print length, 1 */
- );
- rollback_token();
- if (t_tclass & TC_SEQSTART) {
- /* It was a "(" token. Handle just like TC_BUILTIN */
- cn->l.n = condition();
- }
- break;
- }
+ case TC_GETLINE:
+ debug_printf_parse("%s: TC_GETLINE\n", __func__);
+ glptr = cn;
+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
+ break;
+
+ case TC_BUILTIN:
+ debug_printf_parse("%s: TC_BUILTIN\n", __func__);
+ cn->l.n = condition();
+ break;
+
+ case TC_LENGTH:
+ debug_printf_parse("%s: TC_LENGTH\n", __func__);
+ next_token(TC_SEQSTART /* length(...) */
+ | TS_OPTERM /* length; (or newline)*/
+ | TC_GRPTERM /* length } */
+ | TC_BINOPX /* length <op> NUM */
+ | TC_COMMA /* print length, 1 */
+ );
+ rollback_token();
+ if (t_tclass & TC_SEQSTART) {
+ /* It was a "(" token. Handle just like TC_BUILTIN */
+ cn->l.n = condition();
}
+ break;
}
} /* while() */
--
2.27.0
From acea2fffaa696b855d5189a8a1cd7591fac8891d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 01:50:47 +0200
Subject: [PATCH 11/61] awk: rename TC_SEQSTART/END to L/RPAREN, no code
changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 94 +++++++++++++++++++++++++--------------------------
1 file changed, 47 insertions(+), 47 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 9a3b63df6..d31b97d86 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -207,48 +207,48 @@ typedef struct tsplitter_s {
} tsplitter;
/* simple token classes */
-/* Order and hex values are very important!!! See next_token() */
-#define TC_SEQSTART (1 << 0) /* ( */
-#define TC_SEQTERM (1 << 1) /* ) */
+/* order and hex values are very important!!! See next_token() */
+#define TC_LPAREN (1 << 0) /* ( */
+#define TC_RPAREN (1 << 1) /* ) */
#define TC_REGEXP (1 << 2) /* /.../ */
#define TC_OUTRDR (1 << 3) /* | > >> */
#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
#define TC_BINOPX (1 << 6) /* two-opnd operator */
-#define TC_IN (1 << 7)
-#define TC_COMMA (1 << 8)
-#define TC_PIPE (1 << 9) /* input redirection pipe */
+#define TC_IN (1 << 7) /* 'in' */
+#define TC_COMMA (1 << 8) /* , */
+#define TC_PIPE (1 << 9) /* input redirection pipe | */
#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
#define TC_ARRTERM (1 << 11) /* ] */
#define TC_GRPSTART (1 << 12) /* { */
#define TC_GRPTERM (1 << 13) /* } */
-#define TC_SEMICOL (1 << 14)
+#define TC_SEMICOL (1 << 14) /* ; */
#define TC_NEWLINE (1 << 15)
#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
-#define TC_WHILE (1 << 17)
-#define TC_ELSE (1 << 18)
+#define TC_WHILE (1 << 17) /* 'while' */
+#define TC_ELSE (1 << 18) /* 'else' */
#define TC_BUILTIN (1 << 19)
/* This costs ~50 bytes of code.
* A separate class to support deprecated "length" form. If we don't need that
* (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
* can be merged with TC_BUILTIN:
*/
-#define TC_LENGTH (1 << 20)
-#define TC_GETLINE (1 << 21)
+#define TC_LENGTH (1 << 20) /* 'length' */
+#define TC_GETLINE (1 << 21) /* 'getline' */
#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
-#define TC_BEGIN (1 << 23)
-#define TC_END (1 << 24)
+#define TC_BEGIN (1 << 23) /* 'BEGIN' */
+#define TC_END (1 << 24) /* 'END' */
#define TC_EOF (1 << 25)
-#define TC_VARIABLE (1 << 26)
-#define TC_ARRAY (1 << 27)
-#define TC_FUNCTION (1 << 28)
-#define TC_STRING (1 << 29)
+#define TC_VARIABLE (1 << 26) /* name */
+#define TC_ARRAY (1 << 27) /* name[ */
+#define TC_FUNCTION (1 << 28) /* name( - but unlike TC_ARRAY, parser does not consume '(' */
+#define TC_STRING (1 << 29) /* "..." */
#define TC_NUMBER (1 << 30)
#ifndef debug_parse_print_tc
#define debug_parse_print_tc(n) do { \
-if ((n) & TC_SEQSTART) debug_printf_parse(" SEQSTART"); \
-if ((n) & TC_SEQTERM ) debug_printf_parse(" SEQTERM" ); \
+if ((n) & TC_LPAREN ) debug_printf_parse(" LPAREN" ); \
+if ((n) & TC_RPAREN ) debug_printf_parse(" RPAREN" ); \
if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \
if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \
if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \
@@ -288,7 +288,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
| TC_BUILTIN | TC_LENGTH | TC_GETLINE \
- | TC_SEQSTART | TC_STRING | TC_NUMBER)
+ | TC_LPAREN | TC_STRING | TC_NUMBER)
#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
#define TS_STATEMNT (TC_STATX | TC_WHILE)
@@ -310,7 +310,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
/* operator is inserted between them */
-#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
+#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
| TC_STRING | TC_NUMBER | TC_UOPPOST \
| TC_LENGTH)
#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
@@ -394,8 +394,8 @@ enum {
#define NTCC '\377'
static const char tokenlist[] ALIGN1 =
- "\1(" NTC /* TC_SEQSTART */
- "\1)" NTC /* TC_SEQTERM */
+ "\1(" NTC /* TC_LPAREN */
+ "\1)" NTC /* TC_RPAREN */
"\1/" NTC /* TC_REGEXP */
"\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */
"\2++" "\2--" NTC /* TC_UOPPOST */
@@ -1250,9 +1250,9 @@ static uint32_t next_token(uint32_t expected)
/* insert concatenation operator when needed */
debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
(last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
- !(last_token_class == TC_LENGTH && tc == TC_SEQSTART));
+ !(last_token_class == TC_LENGTH && tc == TC_LPAREN));
if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
- && !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
+ && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */
) {
concat_inserted = TRUE;
save_tclass = tc;
@@ -1304,10 +1304,10 @@ static void mk_re_node(const char *s, node *n, regex_t *re)
xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
}
-static node *condition(void)
+static node *parse_lrparen_list(void)
{
- next_token(TC_SEQSTART);
- return parse_expr(TC_SEQTERM);
+ next_token(TC_LPAREN);
+ return parse_expr(TC_RPAREN);
}
/* parse expression terminated by given argument, return ptr
@@ -1430,12 +1430,12 @@ static node *parse_expr(uint32_t term_tc)
debug_printf_parse("%s: TC_FUNCTION\n", __func__);
cn->info = OC_FUNC;
cn->r.f = newfunc(t_string);
- cn->l.n = condition();
+ cn->l.n = parse_lrparen_list();
break;
- case TC_SEQSTART:
- debug_printf_parse("%s: TC_SEQSTART\n", __func__);
- cn = vn->r.n = parse_expr(TC_SEQTERM);
+ case TC_LPAREN:
+ debug_printf_parse("%s: TC_LPAREN\n", __func__);
+ cn = vn->r.n = parse_expr(TC_RPAREN);
if (!cn)
syntax_error("Empty sequence");
cn->a.n = vn;
@@ -1449,21 +1449,21 @@ static node *parse_expr(uint32_t term_tc)
case TC_BUILTIN:
debug_printf_parse("%s: TC_BUILTIN\n", __func__);
- cn->l.n = condition();
+ cn->l.n = parse_lrparen_list();
break;
case TC_LENGTH:
debug_printf_parse("%s: TC_LENGTH\n", __func__);
- next_token(TC_SEQSTART /* length(...) */
+ next_token(TC_LPAREN /* length(...) */
| TS_OPTERM /* length; (or newline)*/
| TC_GRPTERM /* length } */
| TC_BINOPX /* length <op> NUM */
| TC_COMMA /* print length, 1 */
);
rollback_token();
- if (t_tclass & TC_SEQSTART) {
+ if (t_tclass & TC_LPAREN) {
/* It was a "(" token. Handle just like TC_BUILTIN */
- cn->l.n = condition();
+ cn->l.n = parse_lrparen_list();
}
break;
}
@@ -1562,7 +1562,7 @@ static void chain_group(void)
case ST_IF:
debug_printf_parse("%s: ST_IF\n", __func__);
n = chain_node(OC_BR | Vx);
- n->l.n = condition();
+ n->l.n = parse_lrparen_list();
chain_group();
n2 = chain_node(OC_EXEC);
n->r.n = seq->last;
@@ -1576,7 +1576,7 @@ static void chain_group(void)
case ST_WHILE:
debug_printf_parse("%s: ST_WHILE\n", __func__);
- n2 = condition();
+ n2 = parse_lrparen_list();
n = chain_loop(NULL);
n->l.n = n2;
break;
@@ -1587,14 +1587,14 @@ static void chain_group(void)
n = chain_loop(NULL);
n2->a.n = n->a.n;
next_token(TC_WHILE);
- n->l.n = condition();
+ n->l.n = parse_lrparen_list();
break;
case ST_FOR:
debug_printf_parse("%s: ST_FOR\n", __func__);
- next_token(TC_SEQSTART);
- n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
- if (t_tclass & TC_SEQTERM) { /* for-in */
+ next_token(TC_LPAREN);
+ n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
+ if (t_tclass & TC_RPAREN) { /* for-in */
if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
syntax_error(EMSG_UNEXP_TOKEN);
n = chain_node(OC_WALKINIT | VV);
@@ -1607,7 +1607,7 @@ static void chain_group(void)
n = chain_node(OC_EXEC | Vx);
n->l.n = n2;
n2 = parse_expr(TC_SEMICOL);
- n3 = parse_expr(TC_SEQTERM);
+ n3 = parse_expr(TC_RPAREN);
n = chain_loop(n3);
n->l.n = n2;
if (!n2)
@@ -1686,13 +1686,13 @@ static void parse_program(char *p)
f->body.first = NULL;
f->nargs = 0;
/* Match func arg list: a comma sep list of >= 0 args, and a close paren */
- while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) {
+ while (next_token(TC_VARIABLE | TC_RPAREN | TC_COMMA)) {
/* Either an empty arg list, or trailing comma from prev iter
* must be followed by an arg */
- if (f->nargs == 0 && t_tclass == TC_SEQTERM)
+ if (f->nargs == 0 && t_tclass == TC_RPAREN)
break;
- /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */
+ /* TC_LPAREN/TC_COMMA must be followed by TC_VARIABLE */
if (t_tclass != TC_VARIABLE)
syntax_error(EMSG_UNEXP_TOKEN);
@@ -1700,7 +1700,7 @@ static void parse_program(char *p)
v->x.aidx = f->nargs++;
/* Arg followed either by end of arg list or 1 comma */
- if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
+ if (next_token(TC_COMMA | TC_RPAREN) & TC_RPAREN)
break;
//Impossible: next_token() above would error out and die
// if (t_tclass != TC_COMMA)
--
2.27.0
From 100c649a6d5b8085be19fdcbf02218cf2bcb3cae Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 02:32:32 +0200
Subject: [PATCH 12/61] awk: simplify parsing of function declaration
function old new delta
parse_program 328 313 -15
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 26 ++++++++++----------------
1 file changed, 10 insertions(+), 16 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index d31b97d86..08ff02adb 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -769,7 +769,7 @@ static void hash_remove(xhash *hash, const char *name)
static char *skip_spaces(char *p)
{
- while (1) {
+ for (;;) {
if (*p == '\\' && p[1] == '\n') {
p++;
t_lineno++;
@@ -1685,26 +1685,20 @@ static void parse_program(char *p)
f = newfunc(t_string);
f->body.first = NULL;
f->nargs = 0;
- /* Match func arg list: a comma sep list of >= 0 args, and a close paren */
- while (next_token(TC_VARIABLE | TC_RPAREN | TC_COMMA)) {
- /* Either an empty arg list, or trailing comma from prev iter
- * must be followed by an arg */
- if (f->nargs == 0 && t_tclass == TC_RPAREN)
- break;
-
- /* TC_LPAREN/TC_COMMA must be followed by TC_VARIABLE */
- if (t_tclass != TC_VARIABLE)
+ /* func arg list: comma sep list of args, and a close paren */
+ for (;;) {
+ if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
+ if (f->nargs == 0)
+ break; /* func() is ok */
+ /* func(a,) is not ok */
syntax_error(EMSG_UNEXP_TOKEN);
-
+ }
v = findvar(ahash, t_string);
v->x.aidx = f->nargs++;
-
/* Arg followed either by end of arg list or 1 comma */
- if (next_token(TC_COMMA | TC_RPAREN) & TC_RPAREN)
+ if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
break;
-//Impossible: next_token() above would error out and die
-// if (t_tclass != TC_COMMA)
-// syntax_error(EMSG_UNEXP_TOKEN);
+ /* it was a comma, we ate it */
}
seq = &f->body;
chain_group();
--
2.27.0
From 38cbb39458b554d5bcfb5d326dd235f81e3c9b9d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 02:43:02 +0200
Subject: [PATCH 13/61] awk: g_buf[] does not need a separate allocation
function old new delta
exec_builtin 1400 1414 +14
evaluate 3132 3141 +9
getvar_s 121 125 +4
awk_main 902 886 -16
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/1 up/down: 27/-16) Total: 11 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 08ff02adb..7e4f0d142 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -535,7 +535,6 @@ struct globals {
var *Fields;
nvblock *g_cb;
char *g_pos;
- char *g_buf;
smallint icase;
smallint exiting;
smallint nextrec;
@@ -571,6 +570,8 @@ struct globals2 {
/* biggest and least used members go last */
tsplitter fsplitter, rsplitter;
+
+ char g_buf[MAXVARFMT + 1];
};
#define G1 (ptr_to_globals[-1])
#define G (*(struct globals2 *)ptr_to_globals)
@@ -598,7 +599,6 @@ struct globals2 {
#define Fields (G1.Fields )
#define g_cb (G1.g_cb )
#define g_pos (G1.g_pos )
-#define g_buf (G1.g_buf )
#define icase (G1.icase )
#define exiting (G1.exiting )
#define nextrec (G1.nextrec )
@@ -612,6 +612,7 @@ struct globals2 {
#define intvar (G.intvar )
#define fsplitter (G.fsplitter )
#define rsplitter (G.rsplitter )
+#define g_buf (G.g_buf )
#define INIT_G() do { \
SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
t_tclass = TS_OPTERM; \
@@ -3353,9 +3354,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
if (ENABLE_LOCALE_SUPPORT)
setlocale(LC_NUMERIC, "C");
- /* allocate global buffer */
- g_buf = xmalloc(MAXVARFMT + 1);
-
vhash = hash_init();
ahash = hash_init();
fdhash = hash_init();
--
2.27.0
From 743b012550834fe032bdc71257e646e202eac2b2 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 03:02:21 +0200
Subject: [PATCH 14/61] awk: when parsing TC_FUNCTION token, eat its opening
'('
...like we do for array references.
function old new delta
parse_expr 938 948 +10
next_token 788 791 +3
parse_program 313 310 -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/1 up/down: 13/-3) Total: 10 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 7e4f0d142..1a4468a53 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -241,7 +241,7 @@ typedef struct tsplitter_s {
#define TC_EOF (1 << 25)
#define TC_VARIABLE (1 << 26) /* name */
#define TC_ARRAY (1 << 27) /* name[ */
-#define TC_FUNCTION (1 << 28) /* name( - but unlike TC_ARRAY, parser does not consume '(' */
+#define TC_FUNCTION (1 << 28) /* name( */
#define TC_STRING (1 << 29) /* "..." */
#define TC_NUMBER (1 << 30)
@@ -959,6 +959,7 @@ static double getvar_i(var *v)
v->number = my_strtod(&s);
debug_printf_eval("%f (s:'%s')\n", v->number, s);
if (v->type & VF_USER) {
+//TODO: skip_spaces() also skips backslash+newline, is it intended here?
s = skip_spaces(s);
if (*s != '\0')
v->type &= ~VF_USER;
@@ -1103,7 +1104,7 @@ static uint32_t next_token(uint32_t expected)
#define save_tclass (G.next_token__save_tclass)
#define save_info (G.next_token__save_info)
- char *p, *s;
+ char *p;
const char *tl;
const uint32_t *ti;
uint32_t tc, last_token_class;
@@ -1131,15 +1132,12 @@ static uint32_t next_token(uint32_t expected)
while (*p != '\n' && *p != '\0')
p++;
- if (*p == '\n')
- t_lineno++;
-
if (*p == '\0') {
tc = TC_EOF;
debug_printf_parse("%s: token found: TC_EOF\n", __func__);
} else if (*p == '\"') {
/* it's a string */
- t_string = s = ++p;
+ char *s = t_string = ++p;
while (*p != '\"') {
char *pp;
if (*p == '\0' || *p == '\n')
@@ -1154,7 +1152,7 @@ static uint32_t next_token(uint32_t expected)
debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
} else if ((expected & TC_REGEXP) && *p == '/') {
/* it's regexp */
- t_string = s = ++p;
+ char *s = t_string = ++p;
while (*p != '/') {
if (*p == '\0' || *p == '\n')
syntax_error(EMSG_UNEXP_EOS);
@@ -1185,6 +1183,9 @@ static uint32_t next_token(uint32_t expected)
tc = TC_NUMBER;
debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
} else {
+ if (*p == '\n')
+ t_lineno++;
+
/* search for something known */
tl = tokenlist;
tc = 0x00000001;
@@ -1230,15 +1231,15 @@ static uint32_t next_token(uint32_t expected)
if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
p = skip_spaces(p);
if (*p == '(') {
+ p++;
tc = TC_FUNCTION;
debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
+ } else if (*p == '[') {
+ p++;
+ tc = TC_ARRAY;
+ debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
} else {
- if (*p == '[') {
- p++;
- tc = TC_ARRAY;
- debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
- } else
- debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
+ debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
}
}
token_found:
@@ -1431,7 +1432,7 @@ static node *parse_expr(uint32_t term_tc)
debug_printf_parse("%s: TC_FUNCTION\n", __func__);
cn->info = OC_FUNC;
cn->r.f = newfunc(t_string);
- cn->l.n = parse_lrparen_list();
+ cn->l.n = parse_expr(TC_RPAREN);
break;
case TC_LPAREN:
@@ -1682,7 +1683,6 @@ static void parse_program(char *p)
} else if (tclass & TC_FUNCDECL) {
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
next_token(TC_FUNCTION);
- g_pos++;
f = newfunc(t_string);
f->body.first = NULL;
f->nargs = 0;
--
2.27.0
From f80dfb802b4a0984293d50f80cd41519b109b524 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 03:27:07 +0200
Subject: [PATCH 15/61] awk: get rid of "move name one char back" trick in
next_token()
function old new delta
next_token 791 812 +21
awk_main 886 831 -55
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/1 up/down: 21/-55) Total: -34 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 54 +++++++++++++++++++++++++--------------------------
1 file changed, 27 insertions(+), 27 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 1a4468a53..fb1e5d59b 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -535,6 +535,7 @@ struct globals {
var *Fields;
nvblock *g_cb;
char *g_pos;
+ char g_saved_ch;
smallint icase;
smallint exiting;
smallint nextrec;
@@ -599,6 +600,7 @@ struct globals2 {
#define Fields (G1.Fields )
#define g_cb (G1.g_cb )
#define g_pos (G1.g_pos )
+#define g_saved_ch (G1.g_saved_ch )
#define icase (G1.icase )
#define exiting (G1.exiting )
#define nextrec (G1.nextrec )
@@ -1125,6 +1127,10 @@ static uint32_t next_token(uint32_t expected)
t_info = save_info;
} else {
p = g_pos;
+ if (g_saved_ch != '\0') {
+ *p = g_saved_ch;
+ g_saved_ch = '\0';
+ }
readnext:
p = skip_spaces(p);
g_lineno = t_lineno;
@@ -1183,6 +1189,8 @@ static uint32_t next_token(uint32_t expected)
tc = TC_NUMBER;
debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
} else {
+ char *end_of_name;
+
if (*p == '\n')
t_lineno++;
@@ -1219,16 +1227,14 @@ static uint32_t next_token(uint32_t expected)
if (!isalnum_(*p))
syntax_error(EMSG_UNEXP_TOKEN); /* no */
/* yes */
-/* "move name one char back" trick: we need a byte for NUL terminator */
-/* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */
- t_string = --p;
- while (isalnum_(*++p)) {
- p[-1] = *p;
- }
- p[-1] = '\0';
+ t_string = p;
+ while (isalnum_(*p))
+ p++;
+ end_of_name = p;
tc = TC_VARIABLE;
/* also consume whitespace between functionname and bracket */
if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
+//TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters?
p = skip_spaces(p);
if (*p == '(') {
p++;
@@ -1240,7 +1246,19 @@ static uint32_t next_token(uint32_t expected)
debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
} else {
debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
+ if (end_of_name == p) {
+ /* there is no space for trailing NUL in t_string!
+ * We need to save the char we are going to NUL.
+ * (we'll use it in future call to next_token())
+ */
+ g_saved_ch = *end_of_name;
+// especially pathological example is V="abc"; V.2 - it's V concatenated to .2
+// (it evaluates to "abc0.2"). Because of this case, we can't simply cache
+// '.' and analyze it later: we also have to *store it back* in next
+// next_token(), in order to give my_strtod() the undamaged ".2" string.
+ }
}
+ *end_of_name = '\0'; /* terminate t_string */
}
token_found:
g_pos = p;
@@ -3420,38 +3438,20 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
g_progname = llist_pop(&list_f);
fd = xopen_stdin(g_progname);
- /* 1st byte is reserved for "move name one char back" trick in next_token */
- i = 1;
- s = NULL;
- for (;;) {
- int sz;
- s = xrealloc(s, i + 1000);
- sz = safe_read(fd, s + i, 1000);
- if (sz <= 0)
- break;
- i += sz;
- }
- s = xrealloc(s, i + 1); /* trim unused 999 bytes */
- s[i] = '\0';
+ s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
close(fd);
- parse_program(s + 1);
+ parse_program(s);
free(s);
}
g_progname = "cmd. line";
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
while (list_e) {
- /* NB: "move name one char back" trick in next_token
- * can use argv[i][-1] here.
- */
parse_program(llist_pop(&list_e));
}
#endif
if (!(opt & (OPT_f | OPT_e))) {
if (!*argv)
bb_show_usage();
- /* NB: "move name one char back" trick in next_token
- * can use argv[i][-1] here.
- */
parse_program(*argv++);
}
--
2.27.0
From 7fbe3864b057dd6c1ba39d7b5071502c09280767 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 03:44:56 +0200
Subject: [PATCH 16/61] awk: code shrink
function old new delta
parse_expr 948 945 -3
chain_expr 65 62 -3
chain_group 655 649 -6
parse_program 310 303 -7
rollback_token 10 - -10
------------------------------------------------------------------------------
(add/remove: 0/1 grow/shrink: 0/4 up/down: 0/-29) Total: -29 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index fb1e5d59b..3d1c04a32 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1300,7 +1300,7 @@ static uint32_t next_token(uint32_t expected)
#undef save_info
}
-static void rollback_token(void)
+static ALWAYS_INLINE void rollback_token(void)
{
t_rollback = TRUE;
}
@@ -1474,14 +1474,14 @@ static node *parse_expr(uint32_t term_tc)
case TC_LENGTH:
debug_printf_parse("%s: TC_LENGTH\n", __func__);
- next_token(TC_LPAREN /* length(...) */
+ tc = next_token(TC_LPAREN /* length(...) */
| TS_OPTERM /* length; (or newline)*/
| TC_GRPTERM /* length } */
| TC_BINOPX /* length <op> NUM */
| TC_COMMA /* print length, 1 */
);
rollback_token();
- if (t_tclass & TC_LPAREN) {
+ if (tc & TC_LPAREN) {
/* It was a "(" token. Handle just like TC_BUILTIN */
cn->l.n = parse_lrparen_list();
}
@@ -1563,19 +1563,23 @@ static void chain_group(void)
if (c & TC_GRPSTART) {
debug_printf_parse("%s: TC_GRPSTART\n", __func__);
- while (next_token(TS_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
+ while ((c = next_token(TS_GRPSEQ | TC_GRPTERM)) != TC_GRPTERM) {
debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
- if (t_tclass & TC_NEWLINE)
+ if (c & TC_NEWLINE)
continue;
rollback_token();
chain_group();
}
debug_printf_parse("%s: TC_GRPTERM\n", __func__);
- } else if (c & (TS_OPSEQ | TS_OPTERM)) {
+ return;
+ }
+ if (c & (TS_OPSEQ | TS_OPTERM)) {
debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
rollback_token();
chain_expr(OC_EXEC | Vx);
- } else {
+ return;
+ }
+ {
/* TS_STATEMNT */
debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
switch (t_info & OPCLSMASK) {
--
2.27.0
From 9dba9fae14ec415943d1fda31b6b48d56d5cb0d0 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 03:47:46 +0200
Subject: [PATCH 17/61] awk: deindent a block, no code changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 167 +++++++++++++++++++++++++-------------------------
1 file changed, 83 insertions(+), 84 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 3d1c04a32..34bcc1798 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1579,98 +1579,97 @@ static void chain_group(void)
chain_expr(OC_EXEC | Vx);
return;
}
- {
- /* TS_STATEMNT */
- debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
- switch (t_info & OPCLSMASK) {
- case ST_IF:
- debug_printf_parse("%s: ST_IF\n", __func__);
- n = chain_node(OC_BR | Vx);
- n->l.n = parse_lrparen_list();
+
+ /* TS_STATEMNT */
+ debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
+ switch (t_info & OPCLSMASK) {
+ case ST_IF:
+ debug_printf_parse("%s: ST_IF\n", __func__);
+ n = chain_node(OC_BR | Vx);
+ n->l.n = parse_lrparen_list();
+ chain_group();
+ n2 = chain_node(OC_EXEC);
+ n->r.n = seq->last;
+ if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
chain_group();
- n2 = chain_node(OC_EXEC);
- n->r.n = seq->last;
- if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
- chain_group();
- n2->a.n = seq->last;
- } else {
- rollback_token();
- }
- break;
+ n2->a.n = seq->last;
+ } else {
+ rollback_token();
+ }
+ break;
- case ST_WHILE:
- debug_printf_parse("%s: ST_WHILE\n", __func__);
- n2 = parse_lrparen_list();
- n = chain_loop(NULL);
- n->l.n = n2;
- break;
+ case ST_WHILE:
+ debug_printf_parse("%s: ST_WHILE\n", __func__);
+ n2 = parse_lrparen_list();
+ n = chain_loop(NULL);
+ n->l.n = n2;
+ break;
- case ST_DO:
- debug_printf_parse("%s: ST_DO\n", __func__);
- n2 = chain_node(OC_EXEC);
- n = chain_loop(NULL);
- n2->a.n = n->a.n;
- next_token(TC_WHILE);
- n->l.n = parse_lrparen_list();
- break;
+ case ST_DO:
+ debug_printf_parse("%s: ST_DO\n", __func__);
+ n2 = chain_node(OC_EXEC);
+ n = chain_loop(NULL);
+ n2->a.n = n->a.n;
+ next_token(TC_WHILE);
+ n->l.n = parse_lrparen_list();
+ break;
- case ST_FOR:
- debug_printf_parse("%s: ST_FOR\n", __func__);
- next_token(TC_LPAREN);
- n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
- if (t_tclass & TC_RPAREN) { /* for-in */
- if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
- syntax_error(EMSG_UNEXP_TOKEN);
- n = chain_node(OC_WALKINIT | VV);
- n->l.n = n2->l.n;
- n->r.n = n2->r.n;
- n = chain_loop(NULL);
- n->info = OC_WALKNEXT | Vx;
- n->l.n = n2->l.n;
- } else { /* for (;;) */
- n = chain_node(OC_EXEC | Vx);
- n->l.n = n2;
- n2 = parse_expr(TC_SEMICOL);
- n3 = parse_expr(TC_RPAREN);
- n = chain_loop(n3);
- n->l.n = n2;
- if (!n2)
- n->info = OC_EXEC;
- }
- break;
+ case ST_FOR:
+ debug_printf_parse("%s: ST_FOR\n", __func__);
+ next_token(TC_LPAREN);
+ n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
+ if (t_tclass & TC_RPAREN) { /* for-in */
+ if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
+ syntax_error(EMSG_UNEXP_TOKEN);
+ n = chain_node(OC_WALKINIT | VV);
+ n->l.n = n2->l.n;
+ n->r.n = n2->r.n;
+ n = chain_loop(NULL);
+ n->info = OC_WALKNEXT | Vx;
+ n->l.n = n2->l.n;
+ } else { /* for (;;) */
+ n = chain_node(OC_EXEC | Vx);
+ n->l.n = n2;
+ n2 = parse_expr(TC_SEMICOL);
+ n3 = parse_expr(TC_RPAREN);
+ n = chain_loop(n3);
+ n->l.n = n2;
+ if (!n2)
+ n->info = OC_EXEC;
+ }
+ break;
- case OC_PRINT:
- case OC_PRINTF:
- debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
- n = chain_node(t_info);
- n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM);
- if (t_tclass & TC_OUTRDR) {
- n->info |= t_info;
- n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM);
- }
- if (t_tclass & TC_GRPTERM)
- rollback_token();
- break;
+ case OC_PRINT:
+ case OC_PRINTF:
+ debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
+ n = chain_node(t_info);
+ n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM);
+ if (t_tclass & TC_OUTRDR) {
+ n->info |= t_info;
+ n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM);
+ }
+ if (t_tclass & TC_GRPTERM)
+ rollback_token();
+ break;
- case OC_BREAK:
- debug_printf_parse("%s: OC_BREAK\n", __func__);
- n = chain_node(OC_EXEC);
- n->a.n = break_ptr;
- chain_expr(t_info);
- break;
+ case OC_BREAK:
+ debug_printf_parse("%s: OC_BREAK\n", __func__);
+ n = chain_node(OC_EXEC);
+ n->a.n = break_ptr;
+ chain_expr(t_info);
+ break;
- case OC_CONTINUE:
- debug_printf_parse("%s: OC_CONTINUE\n", __func__);
- n = chain_node(OC_EXEC);
- n->a.n = continue_ptr;
- chain_expr(t_info);
- break;
+ case OC_CONTINUE:
+ debug_printf_parse("%s: OC_CONTINUE\n", __func__);
+ n = chain_node(OC_EXEC);
+ n->a.n = continue_ptr;
+ chain_expr(t_info);
+ break;
- /* delete, next, nextfile, return, exit */
- default:
- debug_printf_parse("%s: default\n", __func__);
- chain_expr(t_info);
- }
+ /* delete, next, nextfile, return, exit */
+ default:
+ debug_printf_parse("%s: default\n", __func__);
+ chain_expr(t_info);
}
}
--
2.27.0
From bc9e60546c860c130ed9c312517fbbaf0ad51871 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 12:16:36 +0200
Subject: [PATCH 18/61] awk: fix parsing of expressions such as "v (a)"
function old new delta
next_token 812 825 +13
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 22 ++++++++++++++++++----
testsuite/awk.tests | 11 +++++++++++
2 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 34bcc1798..ce860dc04 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1231,11 +1231,24 @@ static uint32_t next_token(uint32_t expected)
while (isalnum_(*p))
p++;
end_of_name = p;
- tc = TC_VARIABLE;
- /* also consume whitespace between functionname and bracket */
- if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
-//TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters?
+
+ if (last_token_class == TC_FUNCDECL)
+ /* eat space in "function FUNC (...) {...}" declaration */
p = skip_spaces(p);
+ else if (expected & TC_ARRAY) {
+ /* eat space between array name and [ */
+ char *s = skip_spaces(p);
+ if (*s == '[') /* array ref, not just a name? */
+ p = s;
+ }
+ /* else: do NOT consume whitespace after variable name!
+ * gawk allows definition "function FUNC (p) {...}" - note space,
+ * but disallows the call "FUNC (p)" because it isn't one -
+ * expression "v (a)" should NOT be parsed as TC_FUNCTION:
+ * it is a valid concatenation if "v" is a variable,
+ * not a function name (and type of name is not known at parse time).
+ */
+
if (*p == '(') {
p++;
tc = TC_FUNCTION;
@@ -1245,6 +1258,7 @@ static uint32_t next_token(uint32_t expected)
tc = TC_ARRAY;
debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
} else {
+ tc = TC_VARIABLE;
debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
if (end_of_name == p) {
/* there is no space for trailing NUL in t_string!
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index cf9b722dc..6e35d33dd 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -71,6 +71,17 @@ testing "awk properly handles undefined function" \
"L1\n\nawk: cmd. line:5: Call to undefined function\n" \
"" ""
+prg='
+BEGIN {
+ v=1
+ a=2
+ print v (a)
+}'
+testing "'v (a)' is not a function call, it is a concatenation" \
+ "awk '$prg' 2>&1" \
+ "12\n" \
+ "" ""
+
optional DESKTOP
testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n"
--
2.27.0
From 08444111ee05f6514bcf6a8c8898ab4e4b827982 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 14:33:04 +0200
Subject: [PATCH 19/61] awk: document which hashes are used at what state
(parse/execute)
We can free them after they are no longer needed.
(Currently, being a NOEXEC applet is much larger waste of memory
for the case of long-running awk script).
function old new delta
awk_main 831 827 -4
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 30 ++++++++++++++++++++----------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index ce860dc04..6142144bb 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -527,7 +527,10 @@ struct globals {
chain *seq;
node *break_ptr, *continue_ptr;
rstream *iF;
- xhash *vhash, *ahash, *fdhash, *fnhash;
+ xhash *ahash; /* argument names, used only while parsing function bodies */
+ xhash *fnhash; /* function names, used only in parsing stage */
+ xhash *vhash; /* variables and arrays */
+ xhash *fdhash; /* file objects, used only in execution stage */
const char *g_progname;
int g_lineno;
int nfields;
@@ -1719,6 +1722,7 @@ static void parse_program(char *p)
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
next_token(TC_FUNCTION);
f = newfunc(t_string);
+//FIXME: dup check: functions can't be redefined, this is not ok: awk 'func f(){}; func f(){}'
f->body.first = NULL;
f->nargs = 0;
/* func arg list: comma sep list of args, and a close paren */
@@ -3389,12 +3393,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
if (ENABLE_LOCALE_SUPPORT)
setlocale(LC_NUMERIC, "C");
- vhash = hash_init();
- ahash = hash_init();
- fdhash = hash_init();
- fnhash = hash_init();
-
/* initialize variables */
+ vhash = hash_init();
{
char *vnames = (char *)vNames; /* cheat */
char *vvalues = (char *)vValues;
@@ -3416,10 +3416,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
handle_special(intvar[FS]);
handle_special(intvar[RS]);
- newfile("/dev/stdin")->F = stdin;
- newfile("/dev/stdout")->F = stdout;
- newfile("/dev/stderr")->F = stderr;
-
/* Huh, people report that sometimes environ is NULL. Oh well. */
if (environ) {
char **envp;
@@ -3449,6 +3445,10 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
if (!is_assignment(llist_pop(&list_v)))
bb_show_usage();
}
+
+ /* Parse all supplied programs */
+ fnhash = hash_init();
+ ahash = hash_init();
while (list_f) {
int fd;
char *s;
@@ -3471,6 +3471,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
bb_show_usage();
parse_program(*argv++);
}
+ //free_hash(ahash) // ~250 bytes, arg names, used only during parse of function bodies
+ //ahash = NULL; // debug
+ //free_hash(fnhash) // ~250 bytes, used only for function names
+ //fnhash = NULL; // debug
+ /* parsing done, on to executing */
/* fill in ARGV array */
setari_u(intvar[ARGV], 0, "awk");
@@ -3479,6 +3484,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
setari_u(intvar[ARGV], ++i, *argv++);
setvar_i(intvar[ARGC], i + 1);
+ fdhash = hash_init();
+ newfile("/dev/stdin")->F = stdin;
+ newfile("/dev/stdout")->F = stdout;
+ newfile("/dev/stderr")->F = stderr;
+
zero_out_var(&tv);
evaluate(beginseq.first, &tv);
if (!mainseq.first && !endseq.first)
--
2.27.0
From ce151c62189985344d90fc554f8780c7305112f8 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 18:33:25 +0200
Subject: [PATCH 20/61] awk: free unused parsing structures after parse is done
function old new delta
hash_clear - 90 +90
awk_main 827 849 +22
clear_array 90 - -90
------------------------------------------------------------------------------
(add/remove: 1/1 grow/shrink: 1/0 up/down: 112/-90) Total: 22 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 74 ++++++++++++++++++++++++++++++++-------------------
1 file changed, 47 insertions(+), 27 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 6142144bb..4e29b28cf 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -530,7 +530,8 @@ struct globals {
xhash *ahash; /* argument names, used only while parsing function bodies */
xhash *fnhash; /* function names, used only in parsing stage */
xhash *vhash; /* variables and arrays */
- xhash *fdhash; /* file objects, used only in execution stage */
+ //xhash *fdhash; /* file objects, used only in execution stage */
+ //we are reusing ahash as fdhash, via define (see later)
const char *g_progname;
int g_lineno;
int nfields;
@@ -592,10 +593,13 @@ struct globals2 {
#define break_ptr (G1.break_ptr )
#define continue_ptr (G1.continue_ptr)
#define iF (G1.iF )
-#define vhash (G1.vhash )
#define ahash (G1.ahash )
-#define fdhash (G1.fdhash )
#define fnhash (G1.fnhash )
+#define vhash (G1.vhash )
+#define fdhash ahash
+//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing,
+// and ends up empty after parsing phase. Thus, we can simply reuse it
+// for fdhash in execution stage.
#define g_progname (G1.g_progname )
#define g_lineno (G1.g_lineno )
#define nfields (G1.nfields )
@@ -682,6 +686,33 @@ static xhash *hash_init(void)
return newhash;
}
+static void hash_clear(xhash *hash)
+{
+ unsigned i;
+ hash_item *hi, *thi;
+
+ for (i = 0; i < hash->csize; i++) {
+ hi = hash->items[i];
+ while (hi) {
+ thi = hi;
+ hi = hi->next;
+ free(thi->data.v.string);
+ free(thi);
+ }
+ hash->items[i] = NULL;
+ }
+ hash->glen = hash->nel = 0;
+}
+
+#if 0 //UNUSED
+static void hash_free(xhash *hash)
+{
+ hash_clear(hash);
+ free(hash->items);
+ free(hash);
+}
+#endif
+
/* find item in hash, return ptr to data, NULL if not found */
static void *hash_search(xhash *hash, const char *name)
{
@@ -869,23 +900,7 @@ static xhash *iamarray(var *v)
return a->x.array;
}
-static void clear_array(xhash *array)
-{
- unsigned i;
- hash_item *hi, *thi;
-
- for (i = 0; i < array->csize; i++) {
- hi = array->items[i];
- while (hi) {
- thi = hi;
- hi = hi->next;
- free(thi->data.v.string);
- free(thi);
- }
- array->items[i] = NULL;
- }
- array->glen = array->nel = 0;
-}
+#define clear_array(array) hash_clear(array)
/* clear a variable */
static var *clrvar(var *v)
@@ -1742,7 +1757,7 @@ static void parse_program(char *p)
}
seq = &f->body;
chain_group();
- clear_array(ahash);
+ hash_clear(ahash);
} else if (tclass & TS_OPSEQ) {
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
rollback_token();
@@ -3471,11 +3486,16 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
bb_show_usage();
parse_program(*argv++);
}
- //free_hash(ahash) // ~250 bytes, arg names, used only during parse of function bodies
- //ahash = NULL; // debug
- //free_hash(fnhash) // ~250 bytes, used only for function names
- //fnhash = NULL; // debug
- /* parsing done, on to executing */
+ /* Free unused parse structures */
+ //hash_free(fnhash); // ~250 bytes when empty, used only for function names
+ //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
+ // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not).
+ free(fnhash->items);
+ free(fnhash);
+ fnhash = NULL; // debug
+ //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing
+
+ /* Parsing done, on to executing */
/* fill in ARGV array */
setari_u(intvar[ARGV], 0, "awk");
@@ -3484,7 +3504,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
setari_u(intvar[ARGV], ++i, *argv++);
setvar_i(intvar[ARGC], i + 1);
- fdhash = hash_init();
+ //fdhash = ahash - done via define
newfile("/dev/stdin")->F = stdin;
newfile("/dev/stdout")->F = stdout;
newfile("/dev/stderr")->F = stderr;
--
2.27.0
From 465eba0f032c96966d2547f116784fb0d8751943 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Tue, 29 Jun 2021 19:07:36 +0200
Subject: [PATCH 21/61] awk: assorted optimizations
hash_find(): do not caclculate hash twice. Do not divide - can use
cheap multiply-by-8 shift.
nextword(): do not repeatedly increment in-memory value, do it in register,
then store final result.
hashwalk_init(): do not strlen() twice.
function old new delta
hash_search3 - 49 +49
hash_find 259 281 +22
nextword 19 16 -3
evaluate 3141 3137 -4
hash_search 54 28 -26
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 1/3 up/down: 71/-33) Total: 38 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 4e29b28cf..a4cd3cf93 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -696,6 +696,7 @@ static void hash_clear(xhash *hash)
while (hi) {
thi = hi;
hi = hi->next;
+//FIXME: this assumes that it's a hash of *variables*:
free(thi->data.v.string);
free(thi);
}
@@ -714,11 +715,11 @@ static void hash_free(xhash *hash)
#endif
/* find item in hash, return ptr to data, NULL if not found */
-static void *hash_search(xhash *hash, const char *name)
+static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
{
hash_item *hi;
- hi = hash->items[hashidx(name) % hash->csize];
+ hi = hash->items[idx % hash->csize];
while (hi) {
if (strcmp(hi->name, name) == 0)
return &hi->data;
@@ -727,6 +728,11 @@ static void *hash_search(xhash *hash, const char *name)
return NULL;
}
+static void *hash_search(xhash *hash, const char *name)
+{
+ return hash_search3(hash, name, hashidx(name));
+}
+
/* grow hash if it becomes too big */
static void hash_rebuild(xhash *hash)
{
@@ -762,16 +768,17 @@ static void *hash_find(xhash *hash, const char *name)
unsigned idx;
int l;
- hi = hash_search(hash, name);
+ idx = hashidx(name);
+ hi = hash_search3(hash, name, idx);
if (!hi) {
- if (++hash->nel / hash->csize > 10)
+ if (++hash->nel > hash->csize * 8)
hash_rebuild(hash);
l = strlen(name) + 1;
hi = xzalloc(sizeof(*hi) + l);
strcpy(hi->name, name);
- idx = hashidx(name) % hash->csize;
+ idx = idx % hash->csize;
hi->next = hash->items[idx];
hash->items[idx] = hi;
hash->glen += l;
@@ -822,8 +829,10 @@ static char *skip_spaces(char *p)
static char *nextword(char **s)
{
char *p = *s;
- while (*(*s)++ != '\0')
+ char *q = p;
+ while (*q++ != '\0')
continue;
+ *s = q;
return p;
}
@@ -2116,8 +2125,7 @@ static void hashwalk_init(var *v, xhash *array)
for (i = 0; i < array->csize; i++) {
hi = array->items[i];
while (hi) {
- strcpy(w->end, hi->name);
- nextword(&w->end);
+ w->end = stpcpy(w->end, hi->name) + 1;
hi = hi->next;
}
}
@@ -3504,7 +3512,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
setari_u(intvar[ARGV], ++i, *argv++);
setvar_i(intvar[ARGC], i + 1);
- //fdhash = ahash - done via define
+ //fdhash = ahash; // done via define
newfile("/dev/stdin")->F = stdin;
newfile("/dev/stdout")->F = stdout;
newfile("/dev/stderr")->F = stderr;
--
2.27.0
From 467708ee9c852a4535d554214bb70b916743335a Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Wed, 30 Jun 2021 02:12:27 +0200
Subject: [PATCH 22/61] awk: remove custom pool allocator for temporary awk
variables
It seems to be designed to reduce overhead of malloc's auxiliary data,
by allocating at least 64 variables as a block.
With "struct var" being about 20-32 bytes long (32/64 bits),
malloc overhead for one temporary indeed is high, ~33% more memory used
than needed.
function old new delta
evaluate 3137 3145 +8
modprobe_main 798 803 +5
exec_builtin 1414 1419 +5
awk_printf 476 481 +5
as_regex 132 137 +5
EMSG_INTERNAL_ERROR 15 - -15
nvfree 169 116 -53
nvalloc 145 - -145
------------------------------------------------------------------------------
(add/remove: 0/2 grow/shrink: 5/1 up/down: 28/-213) Total: -185 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 164 +++++++++++++++++++-------------------------------
1 file changed, 61 insertions(+), 103 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index a4cd3cf93..35c11ec58 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -93,7 +93,6 @@ enum {
};
#define MAXVARFMT 240
-#define MINNVBLOCK 64
/* variable flags */
#define VF_NUMBER 0x0001 /* 1 = primary type is number */
@@ -120,8 +119,8 @@ typedef struct walker_list {
/* Variable */
typedef struct var_s {
unsigned type; /* flags */
- double number;
char *string;
+ double number;
union {
int aidx; /* func arg idx (for compilation stage) */
struct xhash_s *array; /* array ptr */
@@ -192,15 +191,6 @@ typedef struct node_s {
} a;
} node;
-/* Block of temporary variables */
-typedef struct nvblock_s {
- int size;
- var *pos;
- struct nvblock_s *prev;
- struct nvblock_s *next;
- var nv[];
-} nvblock;
-
typedef struct tsplitter_s {
node n;
regex_t re[2];
@@ -537,7 +527,6 @@ struct globals {
int nfields;
int maxfields; /* used in fsrealloc() only */
var *Fields;
- nvblock *g_cb;
char *g_pos;
char g_saved_ch;
smallint icase;
@@ -605,7 +594,6 @@ struct globals2 {
#define nfields (G1.nfields )
#define maxfields (G1.maxfields )
#define Fields (G1.Fields )
-#define g_cb (G1.g_cb )
#define g_pos (G1.g_pos )
#define g_saved_ch (G1.g_saved_ch )
#define icase (G1.icase )
@@ -640,7 +628,6 @@ static int awk_exit(int) NORETURN;
/* ---- error handling ---- */
-static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
@@ -1050,77 +1037,6 @@ static int istrue(var *v)
return (v->string && v->string[0]);
}
-/* temporary variables allocator. Last allocated should be first freed */
-static var *nvalloc(int n)
-{
- nvblock *pb = NULL;
- var *v, *r;
- int size;
-
- while (g_cb) {
- pb = g_cb;
- if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
- break;
- g_cb = g_cb->next;
- }
-
- if (!g_cb) {
- size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
- g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
- g_cb->size = size;
- g_cb->pos = g_cb->nv;
- g_cb->prev = pb;
- /*g_cb->next = NULL; - xzalloc did it */
- if (pb)
- pb->next = g_cb;
- }
-
- v = r = g_cb->pos;
- g_cb->pos += n;
-
- while (v < g_cb->pos) {
- v->type = 0;
- v->string = NULL;
- v++;
- }
-
- return r;
-}
-
-static void nvfree(var *v)
-{
- var *p;
-
- if (v < g_cb->nv || v >= g_cb->pos)
- syntax_error(EMSG_INTERNAL_ERROR);
-
- for (p = v; p < g_cb->pos; p++) {
- if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
- clear_array(iamarray(p));
- free(p->x.array->items);
- free(p->x.array);
- }
- if (p->type & VF_WALK) {
- walker_list *n;
- walker_list *w = p->x.walker;
- debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
- p->x.walker = NULL;
- while (w) {
- n = w->prev;
- debug_printf_walker(" free(%p)\n", w);
- free(w);
- w = n;
- }
- }
- clrvar(p);
- }
-
- g_cb->pos = v;
- while (g_cb->prev && g_cb->pos == g_cb->nv) {
- g_cb = g_cb->prev;
- }
-}
-
/* ------- awk program text parsing ------- */
/* Parse next token pointed by global pos, place results into global t_XYZ variables.
@@ -1793,6 +1709,41 @@ static void parse_program(char *p)
/* -------- program execution part -------- */
+/* temporary variables allocator */
+static var *nvalloc(int sz)
+{
+ return xzalloc(sz * sizeof(var));
+}
+
+static void nvfree(var *v, int sz)
+{
+ var *p = v;
+
+ while (--sz >= 0) {
+ if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
+ clear_array(iamarray(p));
+ free(p->x.array->items);
+ free(p->x.array);
+ }
+ if (p->type & VF_WALK) {
+ walker_list *n;
+ walker_list *w = p->x.walker;
+ debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
+ p->x.walker = NULL;
+ while (w) {
+ n = w->prev;
+ debug_printf_walker(" free(%p)\n", w);
+ free(w);
+ w = n;
+ }
+ }
+ clrvar(p);
+ p++;
+ }
+
+ free(v);
+}
+
static node *mk_splitter(const char *s, tsplitter *spl)
{
regex_t *re, *ire;
@@ -1814,9 +1765,9 @@ static node *mk_splitter(const char *s, tsplitter *spl)
return n;
}
-/* use node as a regular expression. Supplied with node ptr and regex_t
+/* Use node as a regular expression. Supplied with node ptr and regex_t
* storage space. Return ptr to regex (if result points to preg, it should
- * be later regfree'd manually
+ * be later regfree'd manually).
*/
static regex_t *as_regex(node *op, regex_t *preg)
{
@@ -1840,7 +1791,7 @@ static regex_t *as_regex(node *op, regex_t *preg)
cflags &= ~REG_EXTENDED;
xregcomp(preg, s, cflags);
}
- nvfree(v);
+ nvfree(v, 1);
return preg;
}
@@ -2292,6 +2243,8 @@ static char *awk_printf(node *n, int *len)
var *v, *arg;
v = nvalloc(1);
+//TODO: above, to avoid allocating a single temporary var, take a pointer
+//to a temporary that our caller (evaluate()) already has?
fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
i = 0;
@@ -2333,7 +2286,7 @@ static char *awk_printf(node *n, int *len)
}
free(fmt);
- nvfree(v);
+ nvfree(v, 1);
b = xrealloc(b, i + 1);
b[i] = '\0';
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
@@ -2661,14 +2614,14 @@ static NOINLINE var *exec_builtin(node *op, var *res)
break;
}
- nvfree(tv);
+ nvfree(tv, 4);
return res;
#undef tspl
}
/*
* Evaluate node - the heart of the program. Supplied with subtree
- * and place where to store result. returns ptr to result.
+ * and place where to store result. Returns ptr to result.
*/
#define XC(n) ((n) >> 8)
@@ -2953,33 +2906,38 @@ static var *evaluate(node *op, var *res)
break;
case XC( OC_FUNC ): {
- var *vbeg, *v;
+ var *tv, *sv_fnargs;
const char *sv_progname;
+ int nargs1, i;
+
debug_printf_eval("FUNC\n");
- /* The body might be empty, still has to eval the args */
if (!op->r.n->info && !op->r.f->body.first)
syntax_error(EMSG_UNDEF_FUNC);
- vbeg = v = nvalloc(op->r.f->nargs + 1);
+ /* The body might be empty, still has to eval the args */
+ nargs1 = op->r.f->nargs + 1;
+ tv = nvalloc(nargs1);
+ i = 0;
while (op1) {
+//TODO: explain why one iteration is done even for the case p->r.f->nargs == 0
var *arg = evaluate(nextarg(&op1), v1);
- copyvar(v, arg);
- v->type |= VF_CHILD;
- v->x.parent = arg;
- if (++v - vbeg >= op->r.f->nargs)
+ copyvar(&tv[i], arg);
+ tv[i].type |= VF_CHILD;
+ tv[i].x.parent = arg;
+ if (++i >= op->r.f->nargs)
break;
}
- v = fnargs;
- fnargs = vbeg;
+ sv_fnargs = fnargs;
sv_progname = g_progname;
+ fnargs = tv;
res = evaluate(op->r.f->body.first, res);
+ nvfree(fnargs, nargs1);
g_progname = sv_progname;
- nvfree(fnargs);
- fnargs = v;
+ fnargs = sv_fnargs;
break;
}
@@ -3301,7 +3259,7 @@ static var *evaluate(node *op, var *res)
break;
} /* while (op) */
- nvfree(v1);
+ nvfree(v1, 2);
debug_printf_eval("returning from %s(): %p\n", __func__, res);
return res;
#undef fnargs
--
2.27.0
From c5ddfb36e34c93d63546bc3a7f458b946fa64825 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Wed, 30 Jun 2021 12:12:20 +0200
Subject: [PATCH 23/61] awk: replace incorrect use of union in undefined
function check (no code changes)
...which reveals that it's buggy: it thinks "func f(){}" is an undefined function!
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/editors/awk.c b/editors/awk.c
index 35c11ec58..1115085da 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2912,7 +2912,7 @@ static var *evaluate(node *op, var *res)
debug_printf_eval("FUNC\n");
- if (!op->r.n->info && !op->r.f->body.first)
+ if (op->r.f->nargs == 0 && !op->r.f->body.first)
syntax_error(EMSG_UNDEF_FUNC);
/* The body might be empty, still has to eval the args */
--
2.27.0
From 1295da1db50adb2b6db53c6d057fdcc952b0bc78 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Wed, 30 Jun 2021 12:23:51 +0200
Subject: [PATCH 24/61] awk: allow empty fuinctions with no arguments, disallow
function redefinitions
function old new delta
.rodata 103681 103700 +19
parse_program 303 307 +4
evaluate 3145 3141 -4
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/1 up/down: 23/-4) Total: 19 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 11 +++++++----
testsuite/awk.tests | 10 ++++++++++
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 1115085da..c05d5d651 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -139,6 +139,7 @@ typedef struct chain_s {
/* Function */
typedef struct func_s {
unsigned nargs;
+ smallint defined;
struct chain_s body;
} func;
@@ -1662,9 +1663,11 @@ static void parse_program(char *p)
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
next_token(TC_FUNCTION);
f = newfunc(t_string);
-//FIXME: dup check: functions can't be redefined, this is not ok: awk 'func f(){}; func f(){}'
- f->body.first = NULL;
- f->nargs = 0;
+ if (f->defined)
+ syntax_error("Duplicate function");
+ f->defined = 1;
+ //f->body.first = NULL; - already is
+ //f->nargs = 0; - already is
/* func arg list: comma sep list of args, and a close paren */
for (;;) {
if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
@@ -2912,7 +2915,7 @@ static var *evaluate(node *op, var *res)
debug_printf_eval("FUNC\n");
- if (op->r.f->nargs == 0 && !op->r.f->body.first)
+ if (!op->r.f->defined)
syntax_error(EMSG_UNDEF_FUNC);
/* The body might be empty, still has to eval the args */
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 6e35d33dd..873cc3680 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -44,6 +44,16 @@ testing "awk handles empty function f(arg){}" \
"L1\n\nL2\n\n" \
"" ""
+prg='
+function empty_fun(){}
+END {empty_fun()
+ print "Ok"
+}'
+testing "awk handles empty function f(){}" \
+ "awk '$prg'" \
+ "Ok\n" \
+ "" ""
+
prg='
function outer_fun() {
return 1
--
2.27.0
From d88539017ebe731ba507fda8def65969bd14e582 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Wed, 30 Jun 2021 12:42:39 +0200
Subject: [PATCH 25/61] awk: rewrite "print" logic a bit to make it clearer
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index c05d5d651..0fbca0433 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2792,7 +2792,7 @@ static var *evaluate(node *op, var *res)
if (!op1) {
fputs(getvar_s(intvar[F0]), F);
} else {
- while (op1) {
+ for (;;) {
var *v = evaluate(nextarg(&op1), v1);
if (v->type & VF_NUMBER) {
fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
@@ -2801,13 +2801,12 @@ static var *evaluate(node *op, var *res)
} else {
fputs(getvar_s(v), F);
}
-
- if (op1)
- fputs(getvar_s(intvar[OFS]), F);
+ if (!op1)
+ break;
+ fputs(getvar_s(intvar[OFS]), F);
}
}
fputs(getvar_s(intvar[ORS]), F);
-
} else { /* OC_PRINTF */
char *s = awk_printf(op1, &len);
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
--
2.27.0
From 04a90dbf88727415f4bcd3d1125d463255557d55 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Wed, 30 Jun 2021 12:52:51 +0200
Subject: [PATCH 26/61] awk: evaluate all, even superfluous function args
function old new delta
evaluate 3128 3135 +7
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 19 ++++++++++++-------
testsuite/awk.tests | 8 +++++++-
2 files changed, 19 insertions(+), 8 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 0fbca0433..47bbc10a6 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2910,7 +2910,7 @@ static var *evaluate(node *op, var *res)
case XC( OC_FUNC ): {
var *tv, *sv_fnargs;
const char *sv_progname;
- int nargs1, i;
+ int nargs, i;
debug_printf_eval("FUNC\n");
@@ -2918,17 +2918,22 @@ static var *evaluate(node *op, var *res)
syntax_error(EMSG_UNDEF_FUNC);
/* The body might be empty, still has to eval the args */
- nargs1 = op->r.f->nargs + 1;
- tv = nvalloc(nargs1);
+ nargs = op->r.f->nargs;
+ tv = nvalloc(nargs);
i = 0;
while (op1) {
-//TODO: explain why one iteration is done even for the case p->r.f->nargs == 0
var *arg = evaluate(nextarg(&op1), v1);
+ if (i == nargs) {
+ /* call with more arguments than function takes.
+ * (gawk warns: "warning: function 'f' called with more arguments than declared").
+ * They are still evaluated, but discarded: */
+ clrvar(arg);
+ continue;
+ }
copyvar(&tv[i], arg);
tv[i].type |= VF_CHILD;
tv[i].x.parent = arg;
- if (++i >= op->r.f->nargs)
- break;
+ i++;
}
sv_fnargs = fnargs;
@@ -2936,7 +2941,7 @@ static var *evaluate(node *op, var *res)
fnargs = tv;
res = evaluate(op->r.f->body.first, res);
- nvfree(fnargs, nargs1);
+ nvfree(fnargs, nargs);
g_progname = sv_progname;
fnargs = sv_fnargs;
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 873cc3680..3c230393f 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -87,11 +87,17 @@ BEGIN {
a=2
print v (a)
}'
-testing "'v (a)' is not a function call, it is a concatenation" \
+testing "awk 'v (a)' is not a function call, it is a concatenation" \
"awk '$prg' 2>&1" \
"12\n" \
"" ""
+prg='func f(){print"F"};func g(){print"G"};BEGIN{f(g(),g())}'
+testing "awk unused function args are evaluated" \
+ "awk '$prg' 2>&1" \
+ "G\nG\nF\n" \
+ "" ""
+
optional DESKTOP
testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n"
--
2.27.0
From fd5451c7894cd617a812d095a5d4d3cdc215b218 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Thu, 1 Jul 2021 16:02:16 +0200
Subject: [PATCH 27/61] awk: rename temp variables, no code changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 76 +++++++++++++++++++++++++++++++--------------------
1 file changed, 46 insertions(+), 30 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 47bbc10a6..2c2cb74d7 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1775,14 +1775,14 @@ static node *mk_splitter(const char *s, tsplitter *spl)
static regex_t *as_regex(node *op, regex_t *preg)
{
int cflags;
- var *v;
+ var *tmpvar;
const char *s;
if ((op->info & OPCLSMASK) == OC_REGEXP) {
return icase ? op->r.ire : op->l.re;
}
- v = nvalloc(1);
- s = getvar_s(evaluate(op, v));
+ tmpvar = nvalloc(1);
+ s = getvar_s(evaluate(op, tmpvar));
cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
/* Testcase where REG_EXTENDED fails (unpaired '{'):
@@ -1794,7 +1794,7 @@ static regex_t *as_regex(node *op, regex_t *preg)
cflags &= ~REG_EXTENDED;
xregcomp(preg, s, cflags);
}
- nvfree(v, 1);
+ nvfree(tmpvar, 1);
return preg;
}
@@ -2243,12 +2243,12 @@ static char *awk_printf(node *n, int *len)
const char *s1;
int i, j, incr, bsize;
char c, c1;
- var *v, *arg;
+ var *tmpvar, *arg;
- v = nvalloc(1);
+ tmpvar = nvalloc(1);
//TODO: above, to avoid allocating a single temporary var, take a pointer
//to a temporary that our caller (evaluate()) already has?
- fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
+ fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), tmpvar)));
i = 0;
while (*f) {
@@ -2268,7 +2268,7 @@ static char *awk_printf(node *n, int *len)
f++;
c1 = *f;
*f = '\0';
- arg = evaluate(nextarg(&n), v);
+ arg = evaluate(nextarg(&n), tmpvar);
j = i;
if (c == 'c' || !c) {
@@ -2289,7 +2289,7 @@ static char *awk_printf(node *n, int *len)
}
free(fmt);
- nvfree(v, 1);
+ nvfree(tmpvar, 1);
b = xrealloc(b, i + 1);
b[i] = '\0';
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
@@ -2429,7 +2429,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
{
#define tspl (G.exec_builtin__tspl)
- var *tv;
+ var *tmpvars;
node *an[4];
var *av[4];
const char *as[4];
@@ -2441,7 +2441,12 @@ static NOINLINE var *exec_builtin(node *op, var *res)
time_t tt;
int i, l, ll, n;
- tv = nvalloc(4);
+ tmpvars = nvalloc(4);
+#define TMPVAR0 (tmpvars)
+#define TMPVAR1 (tmpvars + 1)
+#define TMPVAR2 (tmpvars + 2)
+#define TMPVAR3 (tmpvars + 3)
+#define TMPVAR(i) (tmpvars + (i))
isr = info = op->info;
op = op->l.n;
@@ -2449,7 +2454,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
for (i = 0; i < 4 && op; i++) {
an[i] = nextarg(&op);
if (isr & 0x09000000)
- av[i] = evaluate(an[i], &tv[i]);
+ av[i] = evaluate(an[i], TMPVAR(i));
if (isr & 0x08000000)
as[i] = getvar_s(av[i]);
isr >>= 1;
@@ -2474,7 +2479,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
if (nargs > 2) {
spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
- an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
+ an[2] : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
} else {
spl = &fsplitter.n;
}
@@ -2617,7 +2622,13 @@ static NOINLINE var *exec_builtin(node *op, var *res)
break;
}
- nvfree(tv, 4);
+ nvfree(tmpvars, 4);
+#undef TMPVAR0
+#undef TMPVAR1
+#undef TMPVAR2
+#undef TMPVAR3
+#undef TMPVAR
+
return res;
#undef tspl
}
@@ -2636,14 +2647,16 @@ static var *evaluate(node *op, var *res)
#define seed (G.evaluate__seed)
#define sreg (G.evaluate__sreg)
- var *v1;
+ var *tmpvars;
+#define TMPVAR0 (tmpvars)
+#define TMPVAR1 (tmpvars + 1)
if (!op)
return setvar_s(res, NULL);
debug_printf_eval("entered %s()\n", __func__);
- v1 = nvalloc(2);
+ tmpvars = nvalloc(2);
while (op) {
struct {
@@ -2683,7 +2696,7 @@ static var *evaluate(node *op, var *res)
}
if (op1->r.n) { /* array ref? */
const char *s;
- s = getvar_s(evaluate(op1->r.n, v1));
+ s = getvar_s(evaluate(op1->r.n, TMPVAR0));
hash_remove(iamarray(v), s);
} else {
clear_array(iamarray(v));
@@ -2693,7 +2706,7 @@ static var *evaluate(node *op, var *res)
/* execute inevitable things */
if (opinfo & OF_RES1)
- L.v = evaluate(op1, v1);
+ L.v = evaluate(op1, TMPVAR0);
if (opinfo & OF_STR1) {
L.s = getvar_s(L.v);
debug_printf_eval("L.s:'%s'\n", L.s);
@@ -2710,7 +2723,7 @@ static var *evaluate(node *op, var *res)
* (Seen trying to evaluate "$444 $44444")
*/
if (opinfo & OF_RES2) {
- R.v = evaluate(op->r.n, v1+1);
+ R.v = evaluate(op->r.n, TMPVAR1);
//TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
//L.v = NULL;
}
@@ -2793,7 +2806,7 @@ static var *evaluate(node *op, var *res)
fputs(getvar_s(intvar[F0]), F);
} else {
for (;;) {
- var *v = evaluate(nextarg(&op1), v1);
+ var *v = evaluate(nextarg(&op1), TMPVAR0);
if (v->type & VF_NUMBER) {
fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
getvar_i(v), TRUE);
@@ -2892,7 +2905,7 @@ static var *evaluate(node *op, var *res)
/* if source is a temporary string, jusk relink it to dest */
//Disabled: if R.v is numeric but happens to have cached R.v->string,
//then L.v ends up being a string, which is wrong
-// if (R.v == v1+1 && R.v->string) {
+// if (R.v == TMPVAR1 && R.v->string) {
// res = setvar_p(L.v, R.v->string);
// R.v->string = NULL;
// } else {
@@ -2908,7 +2921,7 @@ static var *evaluate(node *op, var *res)
break;
case XC( OC_FUNC ): {
- var *tv, *sv_fnargs;
+ var *argvars, *sv_fnargs;
const char *sv_progname;
int nargs, i;
@@ -2919,10 +2932,10 @@ static var *evaluate(node *op, var *res)
/* The body might be empty, still has to eval the args */
nargs = op->r.f->nargs;
- tv = nvalloc(nargs);
+ argvars = nvalloc(nargs);
i = 0;
while (op1) {
- var *arg = evaluate(nextarg(&op1), v1);
+ var *arg = evaluate(nextarg(&op1), TMPVAR0);
if (i == nargs) {
/* call with more arguments than function takes.
* (gawk warns: "warning: function 'f' called with more arguments than declared").
@@ -2930,18 +2943,18 @@ static var *evaluate(node *op, var *res)
clrvar(arg);
continue;
}
- copyvar(&tv[i], arg);
- tv[i].type |= VF_CHILD;
- tv[i].x.parent = arg;
+ copyvar(&argvars[i], arg);
+ argvars[i].type |= VF_CHILD;
+ argvars[i].x.parent = arg;
i++;
}
sv_fnargs = fnargs;
sv_progname = g_progname;
- fnargs = tv;
+ fnargs = argvars;
res = evaluate(op->r.f->body.first, res);
- nvfree(fnargs, nargs);
+ nvfree(argvars, nargs);
g_progname = sv_progname;
fnargs = sv_fnargs;
@@ -3266,7 +3279,10 @@ static var *evaluate(node *op, var *res)
break;
} /* while (op) */
- nvfree(v1, 2);
+ nvfree(tmpvars, 2);
+#undef TMPVAR0
+#undef TMPVAR1
+
debug_printf_eval("returning from %s(): %p\n", __func__, res);
return res;
#undef fnargs
--
2.27.0
From b1abb8374ff4bd36d9e850a92ab7a3a7668615d2 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Thu, 1 Jul 2021 17:50:26 +0200
Subject: [PATCH 28/61] awk: use static tmpvars instead of nvalloc(1)ed ones
ptest() was using this idea already.
As far as I can see, this is safe. Ttestsuite passes.
One downside is that a temporary from e.g. printf invocation
won't be freed until the next printf call.
function old new delta
awk_printf 481 468 -13
as_regex 137 111 -26
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-39) Total: -39 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 49 ++++++++++++++++++++++++++++++++++---------------
1 file changed, 34 insertions(+), 15 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 2c2cb74d7..0be044eef 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -559,7 +559,9 @@ struct globals2 {
unsigned evaluate__seed;
regex_t evaluate__sreg;
- var ptest__v;
+ var ptest__tmpvar;
+ var awk_printf__tmpvar;
+ var as_regex__tmpvar;
tsplitter exec_builtin__tspl;
@@ -1775,14 +1777,19 @@ static node *mk_splitter(const char *s, tsplitter *spl)
static regex_t *as_regex(node *op, regex_t *preg)
{
int cflags;
- var *tmpvar;
const char *s;
if ((op->info & OPCLSMASK) == OC_REGEXP) {
return icase ? op->r.ire : op->l.re;
}
- tmpvar = nvalloc(1);
- s = getvar_s(evaluate(op, tmpvar));
+
+#define TMPVAR (&G.as_regex__tmpvar)
+ //tmpvar = nvalloc(1);
+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
+ // to decrease memory consumption in deeply-recursive awk programs.
+ // The rule to work safely is to never call evaluate() while our static
+ // TMPVAR's value is still needed.
+ s = getvar_s(evaluate(op, TMPVAR));
cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
/* Testcase where REG_EXTENDED fails (unpaired '{'):
@@ -1794,7 +1801,8 @@ static regex_t *as_regex(node *op, regex_t *preg)
cflags &= ~REG_EXTENDED;
xregcomp(preg, s, cflags);
}
- nvfree(tmpvar, 1);
+ //nvfree(tmpvar, 1);
+#undef TMPVAR
return preg;
}
@@ -2105,8 +2113,11 @@ static int hashwalk_next(var *v)
/* evaluate node, return 1 when result is true, 0 otherwise */
static int ptest(node *pattern)
{
- /* ptest__v is "static": to save stack space? */
- return istrue(evaluate(pattern, &G.ptest__v));
+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
+ // to decrease memory consumption in deeply-recursive awk programs.
+ // The rule to work safely is to never call evaluate() while our static
+ // TMPVAR's value is still needed.
+ return istrue(evaluate(pattern, &G.ptest__tmpvar));
}
/* read next record from stream rsm into a variable v */
@@ -2243,12 +2254,18 @@ static char *awk_printf(node *n, int *len)
const char *s1;
int i, j, incr, bsize;
char c, c1;
- var *tmpvar, *arg;
-
- tmpvar = nvalloc(1);
-//TODO: above, to avoid allocating a single temporary var, take a pointer
-//to a temporary that our caller (evaluate()) already has?
- fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), tmpvar)));
+ var *arg;
+
+ //tmpvar = nvalloc(1);
+#define TMPVAR (&G.awk_printf__tmpvar)
+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
+ // to decrease memory consumption in deeply-recursive awk programs.
+ // The rule to work safely is to never call evaluate() while our static
+ // TMPVAR's value is still needed.
+ fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
+ // ^^^^^^^^^ here we immediately strdup() the value, so the later call
+ // to evaluate() potentially recursing into another awk_printf() can't
+ // mangle the value.
i = 0;
while (*f) {
@@ -2268,7 +2285,7 @@ static char *awk_printf(node *n, int *len)
f++;
c1 = *f;
*f = '\0';
- arg = evaluate(nextarg(&n), tmpvar);
+ arg = evaluate(nextarg(&n), TMPVAR);
j = i;
if (c == 'c' || !c) {
@@ -2289,7 +2306,9 @@ static char *awk_printf(node *n, int *len)
}
free(fmt);
- nvfree(tmpvar, 1);
+// nvfree(tmpvar, 1);
+#undef TMPVAR
+
b = xrealloc(b, i + 1);
b[i] = '\0';
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
--
2.27.0
From de5007b20bc226273fb50130f2cb0fcaf7abfd3b Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 14:27:40 +0200
Subject: [PATCH 29/61] awk: shuffle functions to reduce forward declarations,
no code changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 192 ++++++++++++++++++++++++--------------------------
1 file changed, 94 insertions(+), 98 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 0be044eef..6833c2f0d 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -619,18 +619,6 @@ struct globals2 {
G.evaluate__seed = 1; \
} while (0)
-
-/* function prototypes */
-static void handle_special(var *);
-static node *parse_expr(uint32_t);
-static void chain_group(void);
-static var *evaluate(node *, var *);
-static rstream *next_input_file(void);
-static int fmt_num(char *, int, const char *, double, int);
-static int awk_exit(int) NORETURN;
-
-/* ---- error handling ---- */
-
static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
@@ -642,10 +630,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
-static void zero_out_var(var *vp)
-{
- memset(vp, 0, sizeof(*vp));
-}
+static int awk_exit(int) NORETURN;
static void syntax_error(const char *message) NORETURN;
static void syntax_error(const char *message)
@@ -653,6 +638,11 @@ static void syntax_error(const char *message)
bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
}
+static void zero_out_var(var *vp)
+{
+ memset(vp, 0, sizeof(*vp));
+}
+
/* ---- hash stuff ---- */
static unsigned hashidx(const char *name)
@@ -885,10 +875,29 @@ static double my_strtod(char **pp)
/* -------- working with variables (set/get/copy/etc) -------- */
-static xhash *iamarray(var *v)
+static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
{
- var *a = v;
+ int r = 0;
+ char c;
+ const char *s = format;
+
+ if (int_as_int && n == (long long)n) {
+ r = snprintf(b, size, "%lld", (long long)n);
+ } else {
+ do { c = *s; } while (c && *++s);
+ if (strchr("diouxX", c)) {
+ r = snprintf(b, size, format, (int)n);
+ } else if (strchr("eEfgG", c)) {
+ r = snprintf(b, size, format, n);
+ } else {
+ syntax_error(EMSG_INV_FMT);
+ }
+ }
+ return r;
+}
+static xhash *iamarray(var *a)
+{
while (a->type & VF_CHILD)
a = a->x.parent;
@@ -913,6 +922,8 @@ static var *clrvar(var *v)
return v;
}
+static void handle_special(var *);
+
/* assign string value to variable */
static var *setvar_p(var *v, char *value)
{
@@ -1284,6 +1295,8 @@ static void mk_re_node(const char *s, node *n, regex_t *re)
xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
}
+static node *parse_expr(uint32_t);
+
static node *parse_lrparen_list(void)
{
next_token(TC_LPAREN);
@@ -1488,6 +1501,8 @@ static void chain_expr(uint32_t info)
rollback_token();
}
+static void chain_group(void);
+
static node *chain_loop(node *nn)
{
node *n, *n2, *save_brk, *save_cont;
@@ -1770,6 +1785,8 @@ static node *mk_splitter(const char *s, tsplitter *spl)
return n;
}
+static var *evaluate(node *, var *);
+
/* Use node as a regular expression. Supplied with node ptr and regex_t
* storage space. Return ptr to regex (if result points to preg, it should
* be later regfree'd manually).
@@ -2222,27 +2239,6 @@ static int awk_getline(rstream *rsm, var *v)
return r;
}
-static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
-{
- int r = 0;
- char c;
- const char *s = format;
-
- if (int_as_int && n == (long long)n) {
- r = snprintf(b, size, "%lld", (long long)n);
- } else {
- do { c = *s; } while (c && *++s);
- if (strchr("diouxX", c)) {
- r = snprintf(b, size, format, (int)n);
- } else if (strchr("eEfgG", c)) {
- r = snprintf(b, size, format, n);
- } else {
- syntax_error(EMSG_INV_FMT);
- }
- }
- return r;
-}
-
/* formatted output into an allocated buffer, return ptr to buffer */
#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
# define awk_printf(a, b) awk_printf(a)
@@ -2306,7 +2302,7 @@ static char *awk_printf(node *n, int *len)
}
free(fmt);
-// nvfree(tmpvar, 1);
+ //nvfree(tmpvar, 1);
#undef TMPVAR
b = xrealloc(b, i + 1);
@@ -2652,6 +2648,64 @@ static NOINLINE var *exec_builtin(node *op, var *res)
#undef tspl
}
+/* if expr looks like "var=value", perform assignment and return 1,
+ * otherwise return 0 */
+static int is_assignment(const char *expr)
+{
+ char *exprc, *val;
+
+ if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
+ return FALSE;
+ }
+
+ exprc = xstrdup(expr);
+ val = exprc + (val - expr);
+ *val++ = '\0';
+
+ unescape_string_in_place(val);
+ setvar_u(newvar(exprc), val);
+ free(exprc);
+ return TRUE;
+}
+
+/* switch to next input file */
+static rstream *next_input_file(void)
+{
+#define rsm (G.next_input_file__rsm)
+#define files_happen (G.next_input_file__files_happen)
+
+ FILE *F;
+ const char *fname, *ind;
+
+ if (rsm.F)
+ fclose(rsm.F);
+ rsm.F = NULL;
+ rsm.pos = rsm.adv = 0;
+
+ for (;;) {
+ if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
+ if (files_happen)
+ return NULL;
+ fname = "-";
+ F = stdin;
+ break;
+ }
+ ind = getvar_s(incvar(intvar[ARGIND]));
+ fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
+ if (fname && *fname && !is_assignment(fname)) {
+ F = xfopen_stdin(fname);
+ break;
+ }
+ }
+
+ files_happen = TRUE;
+ setvar_s(intvar[FILENAME], fname);
+ rsm.F = F;
+ return &rsm;
+#undef rsm
+#undef files_happen
+}
+
/*
* Evaluate node - the heart of the program. Supplied with subtree
* and place where to store result. Returns ptr to result.
@@ -3338,64 +3392,6 @@ static int awk_exit(int r)
exit(r);
}
-/* if expr looks like "var=value", perform assignment and return 1,
- * otherwise return 0 */
-static int is_assignment(const char *expr)
-{
- char *exprc, *val;
-
- if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
- return FALSE;
- }
-
- exprc = xstrdup(expr);
- val = exprc + (val - expr);
- *val++ = '\0';
-
- unescape_string_in_place(val);
- setvar_u(newvar(exprc), val);
- free(exprc);
- return TRUE;
-}
-
-/* switch to next input file */
-static rstream *next_input_file(void)
-{
-#define rsm (G.next_input_file__rsm)
-#define files_happen (G.next_input_file__files_happen)
-
- FILE *F;
- const char *fname, *ind;
-
- if (rsm.F)
- fclose(rsm.F);
- rsm.F = NULL;
- rsm.pos = rsm.adv = 0;
-
- for (;;) {
- if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
- if (files_happen)
- return NULL;
- fname = "-";
- F = stdin;
- break;
- }
- ind = getvar_s(incvar(intvar[ARGIND]));
- fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
- if (fname && *fname && !is_assignment(fname)) {
- F = xfopen_stdin(fname);
- break;
- }
- }
-
- files_happen = TRUE;
- setvar_s(intvar[FILENAME], fname);
- rsm.F = F;
- return &rsm;
-#undef rsm
-#undef files_happen
-}
-
int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
int awk_main(int argc UNUSED_PARAM, char **argv)
{
--
2.27.0
From c14ab33f2d8eb07dbf27570be30121cc9734ba04 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 14:29:01 +0200
Subject: [PATCH 30/61] awk: when parsing length(), simplify eating of LPAREN
function old new delta
parse_expr 945 948 +3
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 6833c2f0d..f65449a09 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1453,10 +1453,11 @@ static node *parse_expr(uint32_t term_tc)
| TC_BINOPX /* length <op> NUM */
| TC_COMMA /* print length, 1 */
);
- rollback_token();
- if (tc & TC_LPAREN) {
+ if (tc != TC_LPAREN)
+ rollback_token();
+ else {
/* It was a "(" token. Handle just like TC_BUILTIN */
- cn->l.n = parse_lrparen_list();
+ cn->l.n = parse_expr(TC_RPAREN);
}
break;
}
--
2.27.0
From 8be97151d5ba9f98f27f58068416c203565708d0 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 14:33:13 +0200
Subject: [PATCH 31/61] awk: use "static" tmpvars in main and exit
function old new delta
awk_exit 103 93 -10
awk_main 850 832 -18
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-28) Total: -28 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 17 +++++------------
1 file changed, 5 insertions(+), 12 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index f65449a09..9f5a94037 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -562,6 +562,8 @@ struct globals2 {
var ptest__tmpvar;
var awk_printf__tmpvar;
var as_regex__tmpvar;
+ var exit__tmpvar;
+ var main__tmpvar;
tsplitter exec_builtin__tspl;
@@ -638,11 +640,6 @@ static void syntax_error(const char *message)
bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
}
-static void zero_out_var(var *vp)
-{
- memset(vp, 0, sizeof(*vp));
-}
-
/* ---- hash stuff ---- */
static unsigned hashidx(const char *name)
@@ -3372,11 +3369,9 @@ static int awk_exit(int r)
unsigned i;
if (!exiting) {
- var tv;
exiting = TRUE;
nextrec = FALSE;
- zero_out_var(&tv);
- evaluate(endseq.first, &tv);
+ evaluate(endseq.first, &G.exit__tmpvar);
}
/* waiting for children */
@@ -3404,7 +3399,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
llist_t *list_e = NULL;
#endif
int i;
- var tv;
INIT_G();
@@ -3514,8 +3508,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
newfile("/dev/stdout")->F = stdout;
newfile("/dev/stderr")->F = stderr;
- zero_out_var(&tv);
- evaluate(beginseq.first, &tv);
+ evaluate(beginseq.first, &G.main__tmpvar);
if (!mainseq.first && !endseq.first)
awk_exit(EXIT_SUCCESS);
@@ -3532,7 +3525,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
nextrec = FALSE;
incvar(intvar[NR]);
incvar(intvar[FNR]);
- evaluate(mainseq.first, &tv);
+ evaluate(mainseq.first, &G.main__tmpvar);
if (nextfile)
break;
--
2.27.0
From 7f4cd583daf8dcb431f07fd3402ca7ddc11b21ab Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 14:53:52 +0200
Subject: [PATCH 32/61] awk: shuffle globals for smaller offsets
function old new delta
awk_main 832 829 -3
evaluate 3229 3223 -6
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-9) Total: -9 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 25 +++++++++++++------------
1 file changed, 13 insertions(+), 12 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 9f5a94037..068ed687b 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -536,6 +536,11 @@ struct globals {
smallint nextfile;
smallint is_f0_split;
smallint t_rollback;
+
+ /* former statics from various functions */
+ smallint next_token__concat_inserted;
+ uint32_t next_token__save_tclass;
+ uint32_t next_token__save_info;
};
struct globals2 {
uint32_t t_info; /* often used */
@@ -548,15 +553,11 @@ struct globals2 {
/* former statics from various functions */
char *split_f0__fstrings;
- uint32_t next_token__save_tclass;
- uint32_t next_token__save_info;
- smallint next_token__concat_inserted;
-
- smallint next_input_file__files_happen;
rstream next_input_file__rsm;
+ smallint next_input_file__files_happen;
- var *evaluate__fnargs;
unsigned evaluate__seed;
+ var *evaluate__fnargs;
regex_t evaluate__sreg;
var ptest__tmpvar;
@@ -575,10 +576,10 @@ struct globals2 {
#define G1 (ptr_to_globals[-1])
#define G (*(struct globals2 *)ptr_to_globals)
/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
-/*char G1size[sizeof(G1)]; - 0x74 */
-/*char Gsize[sizeof(G)]; - 0x1c4 */
+//char G1size[sizeof(G1)]; // 0x70
+//char Gsize[sizeof(G)]; // 0x2f8
/* Trying to keep most of members accessible with short offsets: */
-/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
+//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c
#define t_double (G1.t_double )
#define beginseq (G1.beginseq )
#define mainseq (G1.mainseq )
@@ -1056,9 +1057,9 @@ static int istrue(var *v)
*/
static uint32_t next_token(uint32_t expected)
{
-#define concat_inserted (G.next_token__concat_inserted)
-#define save_tclass (G.next_token__save_tclass)
-#define save_info (G.next_token__save_info)
+#define concat_inserted (G1.next_token__concat_inserted)
+#define save_tclass (G1.next_token__save_tclass)
+#define save_info (G1.next_token__save_info)
char *p;
const char *tl;
--
2.27.0
From 51262cc2c47f586d9478cc3c4f4977d98b36222b Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 15:19:14 +0200
Subject: [PATCH 33/61] awk: do not special-case "delete"
Rework of the previous fix:
Can use operation attributes to disable arg evaluation instead of special-casing.
function old new delta
.rodata 104032 104036 +4
evaluate 3223 3215 -8
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/1 up/down: 4/-8) Total: -4 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 56 +++++++++++++++++++++++++--------------------------
1 file changed, 27 insertions(+), 29 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 068ed687b..a3dda6959 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -319,7 +319,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
#define xV OF_RES2
#define xS (OF_RES2 | OF_STR2)
#define Vx OF_RES1
-#define Rx (OF_RES1 | OF_NUM1 | OF_REQUIRED)
+#define Rx OF_REQUIRED
#define VV (OF_RES1 | OF_RES2)
#define Nx (OF_RES1 | OF_NUM1)
#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
@@ -2750,32 +2750,6 @@ static var *evaluate(node *op, var *res)
op1 = op->l.n;
debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
- /* "delete" is special:
- * "delete array[var--]" must evaluate index expr only once,
- * must not evaluate it in "execute inevitable things" part.
- */
- if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) {
- uint32_t info = op1->info & OPCLSMASK;
- var *v;
-
- debug_printf_eval("DELETE\n");
- if (info == OC_VAR) {
- v = op1->l.v;
- } else if (info == OC_FNARG) {
- v = &fnargs[op1->l.aidx];
- } else {
- syntax_error(EMSG_NOT_ARRAY);
- }
- if (op1->r.n) { /* array ref? */
- const char *s;
- s = getvar_s(evaluate(op1->r.n, TMPVAR0));
- hash_remove(iamarray(v), s);
- } else {
- clear_array(iamarray(v));
- }
- goto next;
- }
-
/* execute inevitable things */
if (opinfo & OF_RES1)
L.v = evaluate(op1, TMPVAR0);
@@ -2905,7 +2879,31 @@ static var *evaluate(node *op, var *res)
break;
}
- /* case XC( OC_DELETE ): - moved to happen before arg evaluation */
+ case XC( OC_DELETE ):
+ debug_printf_eval("DELETE\n");
+ {
+ /* "delete" is special:
+ * "delete array[var--]" must evaluate index expr only once.
+ */
+ uint32_t info = op1->info & OPCLSMASK;
+ var *v;
+
+ if (info == OC_VAR) {
+ v = op1->l.v;
+ } else if (info == OC_FNARG) {
+ v = &fnargs[op1->l.aidx];
+ } else {
+ syntax_error(EMSG_NOT_ARRAY);
+ }
+ if (op1->r.n) { /* array ref? */
+ const char *s;
+ s = getvar_s(evaluate(op1->r.n, TMPVAR0));
+ hash_remove(iamarray(v), s);
+ } else {
+ clear_array(iamarray(v));
+ }
+ break;
+ }
case XC( OC_NEWSOURCE ):
debug_printf_eval("NEWSOURCE\n");
@@ -3342,7 +3340,7 @@ static var *evaluate(node *op, var *res)
default:
syntax_error(EMSG_POSSIBLE_ERROR);
} /* switch */
- next:
+
if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
op = op->a.n;
if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
--
2.27.0
From 2f36bdf0eb01846b23c1a340ff6f19fd9377ed6a Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 17:32:08 +0200
Subject: [PATCH 34/61] awk: make builtin definitions more understandable, no
code changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 71 +++++++++++++++++++++++++++++++++++----------------
1 file changed, 49 insertions(+), 22 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index a3dda6959..fb841687e 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -331,8 +331,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
#define OPNMASK 0x007F
/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
- * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
- * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
+ * (for builtins it has different meaning)
*/
#undef P
#undef PRIMASK
@@ -430,8 +429,6 @@ static const char tokenlist[] ALIGN1 =
/* compiler adds trailing "\0" */
;
-#define OC_B OC_BUILTIN
-
static const uint32_t tokeninfo[] ALIGN4 = {
0,
0,
@@ -464,20 +461,43 @@ static const uint32_t tokeninfo[] ALIGN4 = {
OC_RETURN|Vx, OC_EXIT|Nx,
ST_WHILE,
0, /* else */
- OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
- OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
- OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
- OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
- OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
- OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
- OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
- OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
- OC_FBLTIN|Sx|F_le, /* TC_LENGTH */
- OC_GETLINE|SV|P(0),
- 0, 0,
- 0,
- 0 /* TC_END */
+// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
+// Highest byte bit pattern: nn s3s2s1 v3v2v1
+// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
+// OC_FBLTIN's are builtins with one optional argument,
+// TODO: enforce exactly one arg for: system, close, cos, sin, exp, int, log, sqrt
+// zero args for: rand systime
+// Do have one optional arg: fflush, srand, length
+#define OC_B OC_BUILTIN
+#define A1 P(0x40) /*one arg*/
+#define A2 P(0x80) /*two args*/
+#define A3 P(0xc0) /*three args*/
+#define __v P(1)
+#define _vv P(3)
+#define __s__v P(9)
+#define __s_vv P(0x0b)
+#define __svvv P(0x0f)
+#define _ss_vv P(0x1b)
+#define _s_vv_ P(0x16)
+#define ss_vv_ P(0x36)
+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
+ OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2
+ OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, // cos exp int log
+ OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand
+ OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
+ OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub
+ OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
+ OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper
+ OC_FBLTIN|Sx|F_le, // length
+ OC_GETLINE|SV, // getline
+ 0, 0, // func function
+ 0, // BEGIN
+ 0 // END
+#undef A1
+#undef A2
+#undef A3
+#undef OC_B
};
/* internal variable names and their initial values */
@@ -1630,6 +1650,7 @@ static void chain_group(void)
debug_printf_parse("%s: OC_BREAK\n", __func__);
n = chain_node(OC_EXEC);
n->a.n = break_ptr;
+//TODO: if break_ptr is NULL, syntax error (not in the loop)?
chain_expr(t_info);
break;
@@ -1637,6 +1658,7 @@ static void chain_group(void)
debug_printf_parse("%s: OC_CONTINUE\n", __func__);
n = chain_node(OC_EXEC);
n->a.n = continue_ptr;
+//TODO: if continue_ptr is NULL, syntax error (not in the loop)?
chain_expr(t_info);
break;
@@ -1799,8 +1821,8 @@ static regex_t *as_regex(node *op, regex_t *preg)
return icase ? op->r.ire : op->l.re;
}
-#define TMPVAR (&G.as_regex__tmpvar)
//tmpvar = nvalloc(1);
+#define TMPVAR (&G.as_regex__tmpvar)
// We use a single "static" tmpvar (instead of on-stack or malloced one)
// to decrease memory consumption in deeply-recursive awk programs.
// The rule to work safely is to never call evaluate() while our static
@@ -2720,8 +2742,6 @@ static var *evaluate(node *op, var *res)
#define sreg (G.evaluate__sreg)
var *tmpvars;
-#define TMPVAR0 (tmpvars)
-#define TMPVAR1 (tmpvars + 1)
if (!op)
return setvar_s(res, NULL);
@@ -2729,6 +2749,8 @@ static var *evaluate(node *op, var *res)
debug_printf_eval("entered %s()\n", __func__);
tmpvars = nvalloc(2);
+#define TMPVAR0 (tmpvars)
+#define TMPVAR1 (tmpvars + 1)
while (op) {
struct {
@@ -3166,7 +3188,7 @@ static var *evaluate(node *op, var *res)
rstream *rsm;
int err = 0;
rsm = (rstream *)hash_search(fdhash, L.s);
- debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
+ debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
if (rsm) {
debug_printf_eval("OC_FBLTIN F_cl "
"rsm->is_pipe:%d, ->F:%p\n",
@@ -3177,6 +3199,11 @@ static var *evaluate(node *op, var *res)
*/
if (rsm->F)
err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
+//TODO: fix this case:
+// $ awk 'BEGIN { print close(""); print ERRNO }'
+// -1
+// close of redirection that was never opened
+// (we print 0, 0)
free(rsm->buffer);
hash_remove(fdhash, L.s);
}
--
2.27.0
From 8eb26034fb7225862c73f1dfa947a5d4910a0935 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 18:28:12 +0200
Subject: [PATCH 35/61] awk: enforce simple builtins' argument number
function old new delta
evaluate 3215 3303 +88
.rodata 104036 104107 +71
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 159/0) Total: 159 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 45 ++++++++++++++++++++++++++++-----------------
1 file changed, 28 insertions(+), 17 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index fb841687e..1925e0771 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -464,11 +464,11 @@ static const uint32_t tokeninfo[] ALIGN4 = {
// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
// Highest byte bit pattern: nn s3s2s1 v3v2v1
// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
-// OC_FBLTIN's are builtins with one optional argument,
-// TODO: enforce exactly one arg for: system, close, cos, sin, exp, int, log, sqrt
-// zero args for: rand systime
-// Do have one optional arg: fflush, srand, length
-#define OC_B OC_BUILTIN
+// OC_FBLTIN's are builtins with zero or one argument.
+// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt.
+// Check for no args is present in builtins' code (not in this table): rand, systime.
+// Have one _optional_ arg: fflush, srand, length
+#define OC_B OC_BUILTIN
#define A1 P(0x40) /*one arg*/
#define A2 P(0x80) /*two args*/
#define A3 P(0xc0) /*three args*/
@@ -480,15 +480,15 @@ static const uint32_t tokeninfo[] ALIGN4 = {
#define _ss_vv P(0x1b)
#define _s_vv_ P(0x16)
#define ss_vv_ P(0x36)
- OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
- OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
- OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2
- OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, // cos exp int log
- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand
- OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
- OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub
- OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
- OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper
+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
+ OC_FBLTIN|Sx|Rx|F_cl,OC_FBLTIN|Sx|Rx|F_sy,OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2
+ OC_FBLTIN|Nx|Rx|F_co,OC_FBLTIN|Nx|Rx|F_ex,OC_FBLTIN|Nx|Rx|F_in,OC_FBLTIN|Nx|Rx|F_lg,// cos exp int log
+ OC_FBLTIN|F_rn, OC_FBLTIN|Nx|Rx|F_si,OC_FBLTIN|Nx|Rx|F_sq,OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand
+ OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
+ OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub
+ OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
+ OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper
OC_FBLTIN|Sx|F_le, // length
OC_GETLINE|SV, // getline
0, 0, // func function
@@ -2773,8 +2773,11 @@ static var *evaluate(node *op, var *res)
debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
/* execute inevitable things */
- if (opinfo & OF_RES1)
+ if (opinfo & OF_RES1) {
+ if ((opinfo & OF_REQUIRED) && !op1)
+ syntax_error(EMSG_TOO_FEW_ARGS);
L.v = evaluate(op1, TMPVAR0);
+ }
if (opinfo & OF_STR1) {
L.s = getvar_s(L.v);
debug_printf_eval("L.s:'%s'\n", L.s);
@@ -3101,12 +3104,18 @@ static var *evaluate(node *op, var *res)
double R_d = R_d; /* for compiler */
debug_printf_eval("FBLTIN\n");
+ if (op1 && (op1->info & OPCLSMASK) == OC_COMMA)
+ /* Simple builtins take one arg maximum */
+ syntax_error("Too many arguments");
+
switch (opn) {
case F_in:
R_d = (long long)L_d;
break;
- case F_rn:
+ case F_rn: /*rand*/
+ if (op1)
+ syntax_error("Too many arguments");
R_d = (double)rand() / (double)RAND_MAX;
break;
@@ -3149,7 +3158,9 @@ static var *evaluate(node *op, var *res)
srand(seed);
break;
- case F_ti:
+ case F_ti: /*systime*/
+ if (op1)
+ syntax_error("Too many arguments");
R_d = time(NULL);
break;
--
2.27.0
From bd554e662f7246fd1518db37049aaf9ecf61bce9 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 18:55:00 +0200
Subject: [PATCH 36/61] awk: beautify builtins table, no code changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 26 ++++++++++++++------------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 1925e0771..8d7777ca6 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -464,11 +464,12 @@ static const uint32_t tokeninfo[] ALIGN4 = {
// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
// Highest byte bit pattern: nn s3s2s1 v3v2v1
// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
-// OC_FBLTIN's are builtins with zero or one argument.
+// OC_F's are builtins with zero or one argument.
// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt.
// Check for no args is present in builtins' code (not in this table): rand, systime.
// Have one _optional_ arg: fflush, srand, length
#define OC_B OC_BUILTIN
+#define OC_F OC_FBLTIN
#define A1 P(0x40) /*one arg*/
#define A2 P(0x80) /*two args*/
#define A3 P(0xc0) /*three args*/
@@ -480,17 +481,17 @@ static const uint32_t tokeninfo[] ALIGN4 = {
#define _ss_vv P(0x1b)
#define _s_vv_ P(0x16)
#define ss_vv_ P(0x36)
- OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
- OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
- OC_FBLTIN|Sx|Rx|F_cl,OC_FBLTIN|Sx|Rx|F_sy,OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2
- OC_FBLTIN|Nx|Rx|F_co,OC_FBLTIN|Nx|Rx|F_ex,OC_FBLTIN|Nx|Rx|F_in,OC_FBLTIN|Nx|Rx|F_lg,// cos exp int log
- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|Rx|F_si,OC_FBLTIN|Nx|Rx|F_sq,OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand
- OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
- OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub
- OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
- OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper
- OC_FBLTIN|Sx|F_le, // length
- OC_GETLINE|SV, // getline
+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
+ OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2, // close system fflush atan2
+ OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx, // cos exp int log
+ OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand
+ OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
+ OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub
+ OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
+ OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper
+ OC_F|F_le|Sx, // length
+ OC_GETLINE|SV, // getline
0, 0, // func function
0, // BEGIN
0 // END
@@ -498,6 +499,7 @@ static const uint32_t tokeninfo[] ALIGN4 = {
#undef A2
#undef A3
#undef OC_B
+#undef OC_F
};
/* internal variable names and their initial values */
--
2.27.0
From 2fcb86ed0176fcfe85d279d637a3d1b15ecf24bb Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 19:38:03 +0200
Subject: [PATCH 37/61] awk: rand() could return 1.0, fix this - should be in
[0,1)
While at it, make it finer-grained (63 bits of randomness)
function old new delta
evaluate 3303 3336 +33
.rodata 104107 104111 +4
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 37/0) Total: 37 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 8d7777ca6..64fe81be4 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -3118,9 +3118,20 @@ static var *evaluate(node *op, var *res)
case F_rn: /*rand*/
if (op1)
syntax_error("Too many arguments");
- R_d = (double)rand() / (double)RAND_MAX;
+ {
+#if RAND_MAX >= 0x7fffffff
+ uint32_t u = ((uint32_t)rand() << 16) ^ rand();
+ uint64_t v = ((uint64_t)rand() << 32) | u;
+ /* the above shift+or is optimized out on 32-bit arches */
+# if RAND_MAX > 0x7fffffff
+ v &= 0x7fffffffffffffffUL;
+# endif
+ R_d = (double)v / 0x8000000000000000UL;
+#else
+# error Not implemented for this value of RAND_MAX
+#endif
break;
-
+ }
case F_co:
if (ENABLE_FEATURE_AWK_LIBM) {
R_d = cos(L_d);
--
2.27.0
From c4aa325fa23237d1c9452ed2be468730d6e2c615 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 22:28:51 +0200
Subject: [PATCH 38/61] awk: fix beavior of "exit" without parameter
function old new delta
evaluate 3336 3339 +3
awk_exit 93 94 +1
awk_main 829 827 -2
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/1 up/down: 4/-2) Total: 2 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 40 ++++++++++++++++++++++------------------
testsuite/awk.tests | 5 +++++
2 files changed, 27 insertions(+), 18 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 64fe81be4..86cb7a95f 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -578,6 +578,8 @@ struct globals2 {
rstream next_input_file__rsm;
smallint next_input_file__files_happen;
+ smalluint exitcode;
+
unsigned evaluate__seed;
var *evaluate__fnargs;
regex_t evaluate__sreg;
@@ -655,7 +657,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
-static int awk_exit(int) NORETURN;
+static int awk_exit(void) NORETURN;
static void syntax_error(const char *message) NORETURN;
static void syntax_error(const char *message)
@@ -2779,14 +2781,14 @@ static var *evaluate(node *op, var *res)
if ((opinfo & OF_REQUIRED) && !op1)
syntax_error(EMSG_TOO_FEW_ARGS);
L.v = evaluate(op1, TMPVAR0);
- }
- if (opinfo & OF_STR1) {
- L.s = getvar_s(L.v);
- debug_printf_eval("L.s:'%s'\n", L.s);
- }
- if (opinfo & OF_NUM1) {
- L_d = getvar_i(L.v);
- debug_printf_eval("L_d:%f\n", L_d);
+ if (opinfo & OF_STR1) {
+ L.s = getvar_s(L.v);
+ debug_printf_eval("L.s:'%s'\n", L.s);
+ }
+ if (opinfo & OF_NUM1) {
+ L_d = getvar_i(L.v);
+ debug_printf_eval("L_d:%f\n", L_d);
+ }
}
/* NB: Must get string/numeric values of L (done above)
* _before_ evaluate()'ing R.v: if both L and R are $NNNs,
@@ -2799,10 +2801,10 @@ static var *evaluate(node *op, var *res)
R.v = evaluate(op->r.n, TMPVAR1);
//TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
//L.v = NULL;
- }
- if (opinfo & OF_STR2) {
- R.s = getvar_s(R.v);
- debug_printf_eval("R.s:'%s'\n", R.s);
+ if (opinfo & OF_STR2) {
+ R.s = getvar_s(R.v);
+ debug_printf_eval("R.s:'%s'\n", R.s);
+ }
}
debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
@@ -2955,7 +2957,9 @@ static var *evaluate(node *op, var *res)
case XC( OC_EXIT ):
debug_printf_eval("EXIT\n");
- awk_exit(L_d);
+ if (op1)
+ G.exitcode = (int)L_d;
+ awk_exit();
/* -- recursive node type -- */
@@ -3414,7 +3418,7 @@ static var *evaluate(node *op, var *res)
/* -------- main & co. -------- */
-static int awk_exit(int r)
+static int awk_exit(void)
{
unsigned i;
@@ -3435,7 +3439,7 @@ static int awk_exit(int r)
}
}
- exit(r);
+ exit(G.exitcode);
}
int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
@@ -3560,7 +3564,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
evaluate(beginseq.first, &G.main__tmpvar);
if (!mainseq.first && !endseq.first)
- awk_exit(EXIT_SUCCESS);
+ awk_exit();
/* input file could already be opened in BEGIN block */
if (!iF)
@@ -3587,6 +3591,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
iF = next_input_file();
}
- awk_exit(EXIT_SUCCESS);
+ awk_exit();
/*return 0;*/
}
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 3c230393f..770d8ffce 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -445,4 +445,9 @@ testing 'awk $NF is empty' \
'' \
'a=====123='
+testing "awk exit N propagates through END's exit" \
+ "awk 'BEGIN { exit 42 } END { exit }'; echo \$?" \
+ "42\n" \
+ '' ''
+
exit $FAILCOUNT
--
2.27.0
From 1829a5b292a37553e8cc8f544448c591b3a7b3f6 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 23:07:21 +0200
Subject: [PATCH 39/61] awk: fix detection of VAR=VAL arguments
1NAME=VAL is not it, neither is VA.R=VAL
function old new delta
next_input_file 216 214 -2
is_assignment 115 91 -24
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-26) Total: -26 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 86cb7a95f..9f14f0f9a 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2679,7 +2679,8 @@ static int is_assignment(const char *expr)
{
char *exprc, *val;
- if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
+ val = (char*)endofname(expr);
+ if (val == (char*)expr || *val != '=') {
return FALSE;
}
@@ -2699,7 +2700,6 @@ static rstream *next_input_file(void)
#define rsm (G.next_input_file__rsm)
#define files_happen (G.next_input_file__files_happen)
- FILE *F;
const char *fname, *ind;
if (rsm.F)
@@ -2712,20 +2712,19 @@ static rstream *next_input_file(void)
if (files_happen)
return NULL;
fname = "-";
- F = stdin;
+ rsm.F = stdin;
break;
}
ind = getvar_s(incvar(intvar[ARGIND]));
fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
if (fname && *fname && !is_assignment(fname)) {
- F = xfopen_stdin(fname);
+ rsm.F = xfopen_stdin(fname);
break;
}
}
files_happen = TRUE;
setvar_s(intvar[FILENAME], fname);
- rsm.F = F;
return &rsm;
#undef rsm
#undef files_happen
--
2.27.0
From 2e495deee760595d6b0df37f1f9b7d1e4ecab1ed Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 23:24:52 +0200
Subject: [PATCH 40/61] awk: use smaller regmatch_t arrays, they had 2 elements
for no apparent reason
function old new delta
exec_builtin 1479 1434 -45
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 9f14f0f9a..c06dd2304 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1937,7 +1937,7 @@ static int awk_split(const char *s, node *spl, char **slist)
n++; /* at least one field will be there */
do {
int l;
- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
+ regmatch_t pmatch[1];
l = strcspn(s, c+2); /* len till next NUL or \n */
if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
@@ -2166,7 +2166,7 @@ static int ptest(node *pattern)
static int awk_getline(rstream *rsm, var *v)
{
char *b;
- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
+ regmatch_t pmatch[1];
int size, a, p, pp = 0;
int fd, so, eo, r, rp;
char c, *m, *s;
@@ -2473,7 +2473,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
node *an[4];
var *av[4];
const char *as[4];
- regmatch_t pmatch[2];
+ regmatch_t pmatch[1];
regex_t sreg, *re;
node *spl;
uint32_t isr, info;
@@ -3533,6 +3533,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
parse_program(llist_pop(&list_e));
}
#endif
+//FIXME: preserve order of -e and -f
+//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
if (!(opt & (OPT_f | OPT_e))) {
if (!*argv)
bb_show_usage();
--
2.27.0
From bb55cde906cbaf136d6487ed7738003aa41b4bd5 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Fri, 2 Jul 2021 23:38:50 +0200
Subject: [PATCH 41/61] awk: move match() code out-of-line
function old new delta
exec_builtin_match - 202 +202
exec_builtin 1434 1157 -277
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/1 up/down: 202/-277) Total: -75 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 45 ++++++++++++++++++++++++++++-----------------
1 file changed, 28 insertions(+), 17 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index c06dd2304..96e06db25 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2465,6 +2465,30 @@ static NOINLINE int do_mktime(const char *ds)
return mktime(&then);
}
+/* Reduce stack usage in exec_builtin() by keeping match() code separate */
+static NOINLINE void exec_builtin_match(node *an1, const char *as0, var *res)
+{
+ regmatch_t pmatch[1];
+ regex_t sreg, *re;
+ int n;
+
+ re = as_regex(an1, &sreg);
+ n = regexec(re, as0, 1, pmatch, 0);
+ if (n == 0) {
+ pmatch[0].rm_so++;
+ pmatch[0].rm_eo++;
+ } else {
+ pmatch[0].rm_so = 0;
+ pmatch[0].rm_eo = -1;
+ }
+ if (re == &sreg)
+ regfree(re);
+ setvar_i(newvar("RSTART"), pmatch[0].rm_so);
+ setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
+ setvar_i(res, pmatch[0].rm_so);
+}
+
+/* Reduce stack usage in evaluate() by keeping builtins' code separate */
static NOINLINE var *exec_builtin(node *op, var *res)
{
#define tspl (G.exec_builtin__tspl)
@@ -2473,8 +2497,6 @@ static NOINLINE var *exec_builtin(node *op, var *res)
node *an[4];
var *av[4];
const char *as[4];
- regmatch_t pmatch[1];
- regex_t sreg, *re;
node *spl;
uint32_t isr, info;
int nargs;
@@ -2633,20 +2655,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
break;
case B_ma:
- re = as_regex(an[1], &sreg);
- n = regexec(re, as[0], 1, pmatch, 0);
- if (n == 0) {
- pmatch[0].rm_so++;
- pmatch[0].rm_eo++;
- } else {
- pmatch[0].rm_so = 0;
- pmatch[0].rm_eo = -1;
- }
- setvar_i(newvar("RSTART"), pmatch[0].rm_so);
- setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
- setvar_i(res, pmatch[0].rm_so);
- if (re == &sreg)
- regfree(re);
+ exec_builtin_match(an[1], as[0], res);
break;
case B_ge:
@@ -2732,7 +2741,9 @@ static rstream *next_input_file(void)
/*
* Evaluate node - the heart of the program. Supplied with subtree
- * and place where to store result. Returns ptr to result.
+ * and "res" variable to assign the result to if we evaluate an expression.
+ * If node refers to e.g. a variable or a field, no assignment happens.
+ * Return ptr to the result (which may or may not be the "res" variable!)
*/
#define XC(n) ((n) >> 8)
--
2.27.0
From a76f1b553545e144f5456c84398a0d98a81ff70d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sat, 3 Jul 2021 00:39:55 +0200
Subject: [PATCH 42/61] awk: rename GRPSTART/END to L/RBRACE, no code changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 60 ++++++++++++++++++++++++++++-----------------------
1 file changed, 33 insertions(+), 27 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 96e06db25..a1a2afd1d 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -211,8 +211,8 @@ typedef struct tsplitter_s {
#define TC_PIPE (1 << 9) /* input redirection pipe | */
#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
#define TC_ARRTERM (1 << 11) /* ] */
-#define TC_GRPSTART (1 << 12) /* { */
-#define TC_GRPTERM (1 << 13) /* } */
+#define TC_LBRACE (1 << 12) /* { */
+#define TC_RBRACE (1 << 13) /* } */
#define TC_SEMICOL (1 << 14) /* ; */
#define TC_NEWLINE (1 << 15)
#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
@@ -250,8 +250,8 @@ if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \
if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \
if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \
if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \
-if ((n) & TC_GRPSTART) debug_printf_parse(" GRPSTART"); \
-if ((n) & TC_GRPTERM ) debug_printf_parse(" GRPTERM" ); \
+if ((n) & TC_LBRACE ) debug_printf_parse(" LBRACE" ); \
+if ((n) & TC_RBRACE ) debug_printf_parse(" RBRACE" ); \
if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \
if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \
if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \
@@ -291,13 +291,13 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
| TC_FUNCDECL | TC_BEGIN | TC_END)
/* discard newlines after these */
-#define TS_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
+#define TS_NOTERM (TC_COMMA | TC_LBRACE | TC_RBRACE \
| TS_BINOP | TS_OPTERM)
/* what can expression begin with */
#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
/* what can group begin with */
-#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_GRPSTART)
+#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_LBRACE)
/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
/* operator is inserted between them */
@@ -402,8 +402,8 @@ static const char tokenlist[] ALIGN1 =
"\1|" NTC /* TC_PIPE */
"\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */
"\1]" NTC /* TC_ARRTERM */
- "\1{" NTC /* TC_GRPSTART */
- "\1}" NTC /* TC_GRPTERM */
+ "\1{" NTC /* TC_LBRACE */
+ "\1}" NTC /* TC_RBRACE */
"\1;" NTC /* TC_SEMICOL */
"\1\n" NTC /* TC_NEWLINE */
"\2if" "\2do" "\3for" "\5break" /* TC_STATX */
@@ -1471,7 +1471,7 @@ static node *parse_expr(uint32_t term_tc)
debug_printf_parse("%s: TC_LENGTH\n", __func__);
tc = next_token(TC_LPAREN /* length(...) */
| TS_OPTERM /* length; (or newline)*/
- | TC_GRPTERM /* length } */
+ | TC_RBRACE /* length } */
| TC_BINOPX /* length <op> NUM */
| TC_COMMA /* print length, 1 */
);
@@ -1516,11 +1516,11 @@ static void chain_expr(uint32_t info)
n = chain_node(info);
- n->l.n = parse_expr(TS_OPTERM | TC_GRPTERM);
+ n->l.n = parse_expr(TS_OPTERM | TC_RBRACE);
if ((info & OF_REQUIRED) && !n->l.n)
syntax_error(EMSG_TOO_FEW_ARGS);
- if (t_tclass & TC_GRPTERM)
+ if (t_tclass & TC_RBRACE)
rollback_token();
}
@@ -1559,16 +1559,16 @@ static void chain_group(void)
c = next_token(TS_GRPSEQ);
} while (c & TC_NEWLINE);
- if (c & TC_GRPSTART) {
- debug_printf_parse("%s: TC_GRPSTART\n", __func__);
- while ((c = next_token(TS_GRPSEQ | TC_GRPTERM)) != TC_GRPTERM) {
- debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
+ if (c & TC_LBRACE) {
+ debug_printf_parse("%s: TC_LBRACE\n", __func__);
+ while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
+ debug_printf_parse("%s: !TC_RBRACE\n", __func__);
if (c & TC_NEWLINE)
continue;
rollback_token();
chain_group();
}
- debug_printf_parse("%s: TC_GRPTERM\n", __func__);
+ debug_printf_parse("%s: TC_RBRACE\n", __func__);
return;
}
if (c & (TS_OPSEQ | TS_OPTERM)) {
@@ -1588,7 +1588,7 @@ static void chain_group(void)
chain_group();
n2 = chain_node(OC_EXEC);
n->r.n = seq->last;
- if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
+ if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
chain_group();
n2->a.n = seq->last;
} else {
@@ -1641,12 +1641,12 @@ static void chain_group(void)
case OC_PRINTF:
debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
n = chain_node(t_info);
- n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM);
+ n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_RBRACE);
if (t_tclass & TC_OUTRDR) {
n->info |= t_info;
- n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM);
+ n->r.n = parse_expr(TS_OPTERM | TC_RBRACE);
}
- if (t_tclass & TC_GRPTERM)
+ if (t_tclass & TC_RBRACE)
rollback_token();
break;
@@ -1684,7 +1684,7 @@ static void parse_program(char *p)
g_pos = p;
t_lineno = 1;
- while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_GRPSTART |
+ while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
if (tclass & TS_OPTERM) {
@@ -1696,10 +1696,14 @@ static void parse_program(char *p)
if (tclass & TC_BEGIN) {
debug_printf_parse("%s: TC_BEGIN\n", __func__);
seq = &beginseq;
+//TODO: ensure there is no newline between BEGIN and {
+//next_token(TC_LBRACE); rollback_token();
chain_group();
} else if (tclass & TC_END) {
debug_printf_parse("%s: TC_END\n", __func__);
seq = &endseq;
+//TODO: ensure there is no newline between END and {
+//next_token(TC_LBRACE); rollback_token();
chain_group();
} else if (tclass & TC_FUNCDECL) {
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
@@ -1726,24 +1730,26 @@ static void parse_program(char *p)
/* it was a comma, we ate it */
}
seq = &f->body;
+//TODO: ensure there is { after "func F(...)" - but newlines are allowed
+//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token();
chain_group();
hash_clear(ahash);
} else if (tclass & TS_OPSEQ) {
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
rollback_token();
cn = chain_node(OC_TEST);
- cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_GRPSTART);
- if (t_tclass & TC_GRPSTART) {
- debug_printf_parse("%s: TC_GRPSTART\n", __func__);
+ cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE);
+ if (t_tclass & TC_LBRACE) {
+ debug_printf_parse("%s: TC_LBRACE\n", __func__);
rollback_token();
chain_group();
} else {
- debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
+ debug_printf_parse("%s: !TC_LBRACE\n", __func__);
chain_node(OC_PRINT);
}
cn->r.n = mainseq.last;
- } else /* if (tclass & TC_GRPSTART) */ {
- debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
+ } else /* if (tclass & TC_LBRACE) */ {
+ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
rollback_token();
chain_group();
}
--
2.27.0
From df7698f1df2ed5a82a1558e167ba3262d1c614cb Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sat, 3 Jul 2021 01:16:48 +0200
Subject: [PATCH 43/61] awk: tighten rules in action parsing
Disallow:
BEGIN
{ action } - must start on the same line
Disallow:
func f()
print "hello" - must be in {...}
function old new delta
chain_until_rbrace - 41 +41
parse_program 307 336 +29
chain_group 649 616 -33
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 1/1 up/down: 70/-33) Total: 37 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 108 ++++++++++++++++++++++++++++++--------------------
1 file changed, 66 insertions(+), 42 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index a1a2afd1d..c68416873 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1549,29 +1549,35 @@ static node *chain_loop(node *nn)
return n;
}
+static void chain_until_rbrace(void)
+{
+ uint32_t tc;
+ while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
+ debug_printf_parse("%s: !TC_RBRACE\n", __func__);
+ if (tc == TC_NEWLINE)
+ continue;
+ rollback_token();
+ chain_group();
+ }
+ debug_printf_parse("%s: TC_RBRACE\n", __func__);
+}
+
/* parse group and attach it to chain */
static void chain_group(void)
{
- uint32_t c;
+ uint32_t tc;
node *n, *n2, *n3;
do {
- c = next_token(TS_GRPSEQ);
- } while (c & TC_NEWLINE);
+ tc = next_token(TS_GRPSEQ);
+ } while (tc == TC_NEWLINE);
- if (c & TC_LBRACE) {
+ if (tc == TC_LBRACE) {
debug_printf_parse("%s: TC_LBRACE\n", __func__);
- while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
- debug_printf_parse("%s: !TC_RBRACE\n", __func__);
- if (c & TC_NEWLINE)
- continue;
- rollback_token();
- chain_group();
- }
- debug_printf_parse("%s: TC_RBRACE\n", __func__);
+ chain_until_rbrace();
return;
}
- if (c & (TS_OPSEQ | TS_OPTERM)) {
+ if (tc & (TS_OPSEQ | TS_OPTERM)) {
debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
rollback_token();
chain_expr(OC_EXEC | Vx);
@@ -1675,37 +1681,48 @@ static void chain_group(void)
static void parse_program(char *p)
{
- uint32_t tclass;
- node *cn;
- func *f;
- var *v;
-
debug_printf_parse("%s()\n", __func__);
g_pos = p;
t_lineno = 1;
- while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
- TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
+ for (;;) {
+ uint32_t tclass;
- if (tclass & TS_OPTERM) {
+ tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
+ TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL);
+
+ if (tclass == TC_EOF) {
+ debug_printf_parse("%s: TC_EOF\n", __func__);
+ break;
+ }
+ if (tclass & TS_OPTERM) { /* ; or <newline> */
debug_printf_parse("%s: TS_OPTERM\n", __func__);
+//NB: gawk allows many newlines, but does not allow more than one semicolon:
+// BEGIN {...}<newline>;<newline>;
+//would complain "each rule must have a pattern or an action part".
+//Same message for
+// ; BEGIN {...}
continue;
}
-
- seq = &mainseq;
- if (tclass & TC_BEGIN) {
+ if (tclass == TC_BEGIN) {
debug_printf_parse("%s: TC_BEGIN\n", __func__);
seq = &beginseq;
-//TODO: ensure there is no newline between BEGIN and {
-//next_token(TC_LBRACE); rollback_token();
- chain_group();
- } else if (tclass & TC_END) {
+ /* ensure there is no newline between BEGIN and { */
+ next_token(TC_LBRACE);
+ chain_until_rbrace();
+ continue;
+ }
+ if (tclass == TC_END) {
debug_printf_parse("%s: TC_END\n", __func__);
seq = &endseq;
-//TODO: ensure there is no newline between END and {
-//next_token(TC_LBRACE); rollback_token();
- chain_group();
- } else if (tclass & TC_FUNCDECL) {
+ /* ensure there is no newline between END and { */
+ next_token(TC_LBRACE);
+ chain_until_rbrace();
+ continue;
+ }
+ if (tclass == TC_FUNCDECL) {
+ func *f;
+
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
next_token(TC_FUNCTION);
f = newfunc(t_string);
@@ -1716,6 +1733,7 @@ static void parse_program(char *p)
//f->nargs = 0; - already is
/* func arg list: comma sep list of args, and a close paren */
for (;;) {
+ var *v;
if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
if (f->nargs == 0)
break; /* func() is ok */
@@ -1730,31 +1748,37 @@ static void parse_program(char *p)
/* it was a comma, we ate it */
}
seq = &f->body;
-//TODO: ensure there is { after "func F(...)" - but newlines are allowed
-//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token();
- chain_group();
+ /* ensure there is { after "func F(...)" - but newlines are allowed */
+ while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
+ continue;
+ chain_until_rbrace();
hash_clear(ahash);
- } else if (tclass & TS_OPSEQ) {
+ continue;
+ }
+ seq = &mainseq;
+ if (tclass & TS_OPSEQ) {
+ node *cn;
+
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
rollback_token();
cn = chain_node(OC_TEST);
cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE);
- if (t_tclass & TC_LBRACE) {
+ if (t_tclass == TC_LBRACE) {
debug_printf_parse("%s: TC_LBRACE\n", __func__);
rollback_token();
chain_group();
} else {
+ /* no action, assume default "{ print }" */
debug_printf_parse("%s: !TC_LBRACE\n", __func__);
chain_node(OC_PRINT);
}
cn->r.n = mainseq.last;
- } else /* if (tclass & TC_LBRACE) */ {
- debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
- rollback_token();
- chain_group();
+ continue;
}
+ /* tclass == TC_LBRACE */
+ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
+ chain_until_rbrace();
}
- debug_printf_parse("%s: TC_EOF\n", __func__);
}
--
2.27.0
From bebe1432529281f66d2004e07194718a47207d5d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sat, 3 Jul 2021 01:32:03 +0200
Subject: [PATCH 44/61] awk: open-code TS_OPTERM, no logic changes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 33 +++++++++++++++++----------------
1 file changed, 17 insertions(+), 16 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index c68416873..8c471d693 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -283,7 +283,6 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
#define TS_STATEMNT (TC_STATX | TC_WHILE)
-#define TS_OPTERM (TC_SEMICOL | TC_NEWLINE)
/* word tokens, cannot mean something else if not expected */
#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \
@@ -291,13 +290,14 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
| TC_FUNCDECL | TC_BEGIN | TC_END)
/* discard newlines after these */
-#define TS_NOTERM (TC_COMMA | TC_LBRACE | TC_RBRACE \
- | TS_BINOP | TS_OPTERM)
+#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
+ | TC_SEMICOL | TC_NEWLINE)
/* what can expression begin with */
#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
/* what can group begin with */
-#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_LBRACE)
+#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \
+ | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
/* operator is inserted between them */
@@ -642,7 +642,7 @@ struct globals2 {
#define g_buf (G.g_buf )
#define INIT_G() do { \
SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
- t_tclass = TS_OPTERM; \
+ t_tclass = TC_NEWLINE; \
G.evaluate__seed = 1; \
} while (0)
@@ -1090,7 +1090,7 @@ static uint32_t next_token(uint32_t expected)
const uint32_t *ti;
uint32_t tc, last_token_class;
- last_token_class = t_tclass; /* t_tclass is initialized to TS_OPTERM */
+ last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */
debug_printf_parse("%s() expected(%x):", __func__, expected);
debug_parse_print_tc(expected);
@@ -1470,7 +1470,8 @@ static node *parse_expr(uint32_t term_tc)
case TC_LENGTH:
debug_printf_parse("%s: TC_LENGTH\n", __func__);
tc = next_token(TC_LPAREN /* length(...) */
- | TS_OPTERM /* length; (or newline)*/
+ | TC_SEMICOL /* length; */
+ | TC_NEWLINE /* length<newline> */
| TC_RBRACE /* length } */
| TC_BINOPX /* length <op> NUM */
| TC_COMMA /* print length, 1 */
@@ -1516,7 +1517,7 @@ static void chain_expr(uint32_t info)
n = chain_node(info);
- n->l.n = parse_expr(TS_OPTERM | TC_RBRACE);
+ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
if ((info & OF_REQUIRED) && !n->l.n)
syntax_error(EMSG_TOO_FEW_ARGS);
@@ -1577,8 +1578,8 @@ static void chain_group(void)
chain_until_rbrace();
return;
}
- if (tc & (TS_OPSEQ | TS_OPTERM)) {
- debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
+ if (tc & (TS_OPSEQ | TC_SEMICOL | TC_NEWLINE)) {
+ debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL | TC_NEWLINE\n", __func__);
rollback_token();
chain_expr(OC_EXEC | Vx);
return;
@@ -1647,10 +1648,10 @@ static void chain_group(void)
case OC_PRINTF:
debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
n = chain_node(t_info);
- n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_RBRACE);
+ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
if (t_tclass & TC_OUTRDR) {
n->info |= t_info;
- n->r.n = parse_expr(TS_OPTERM | TC_RBRACE);
+ n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
}
if (t_tclass & TC_RBRACE)
rollback_token();
@@ -1689,14 +1690,14 @@ static void parse_program(char *p)
uint32_t tclass;
tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
- TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL);
+ TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL);
if (tclass == TC_EOF) {
debug_printf_parse("%s: TC_EOF\n", __func__);
break;
}
- if (tclass & TS_OPTERM) { /* ; or <newline> */
- debug_printf_parse("%s: TS_OPTERM\n", __func__);
+ if (tclass & (TC_SEMICOL | TC_NEWLINE)) {
+ debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__);
//NB: gawk allows many newlines, but does not allow more than one semicolon:
// BEGIN {...}<newline>;<newline>;
//would complain "each rule must have a pattern or an action part".
@@ -1762,7 +1763,7 @@ static void parse_program(char *p)
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
rollback_token();
cn = chain_node(OC_TEST);
- cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE);
+ cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
if (t_tclass == TC_LBRACE) {
debug_printf_parse("%s: TC_LBRACE\n", __func__);
rollback_token();
--
2.27.0
From be80050f2cff5967de7a50eb3aed2f95c39357cd Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sat, 3 Jul 2021 01:59:36 +0200
Subject: [PATCH 45/61] awk: support %F %a %A in printf
function old new delta
.rodata 104111 104120 +9
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/editors/awk.c b/editors/awk.c
index 8c471d693..2c3b49bc8 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -909,7 +909,7 @@ static int fmt_num(char *b, int size, const char *format, double n, int int_as_i
do { c = *s; } while (c && *++s);
if (strchr("diouxX", c)) {
r = snprintf(b, size, format, (int)n);
- } else if (strchr("eEfgG", c)) {
+ } else if (strchr("eEfFgGaA", c)) {
r = snprintf(b, size, format, n);
} else {
syntax_error(EMSG_INV_FMT);
--
2.27.0
From 8b97bd49bdd5181c211f5d7b64108edf9e8962f4 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sat, 3 Jul 2021 11:54:01 +0200
Subject: [PATCH 46/61] awk: do not use a copy of g_progname for
node->l.new_progname
We never destroy g_progname's, the strings still exist, no need to copy
function old new delta
chain_node 104 97 -7
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 2c3b49bc8..4119253ec 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -179,7 +179,7 @@ typedef struct node_s {
struct node_s *n;
var *v;
int aidx;
- char *new_progname;
+ const char *new_progname;
regex_t *re;
} l;
union {
@@ -1501,7 +1501,7 @@ static node *chain_node(uint32_t info)
if (seq->programname != g_progname) {
seq->programname = g_progname;
n = chain_node(OC_NEWSOURCE);
- n->l.new_progname = xstrdup(g_progname);
+ n->l.new_progname = g_progname;
}
n = seq->last;
--
2.27.0
From 61dc1b3f2201368a310b0754a74e6152fe6b015d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sat, 3 Jul 2021 11:57:59 +0200
Subject: [PATCH 47/61] awk: rand(): 64-bit constants should be ULL
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 4119253ec..e4dd6684c 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -3169,9 +3169,9 @@ static var *evaluate(node *op, var *res)
uint64_t v = ((uint64_t)rand() << 32) | u;
/* the above shift+or is optimized out on 32-bit arches */
# if RAND_MAX > 0x7fffffff
- v &= 0x7fffffffffffffffUL;
+ v &= 0x7fffffffffffffffULL;
# endif
- R_d = (double)v / 0x8000000000000000UL;
+ R_d = (double)v / 0x8000000000000000ULL;
#else
# error Not implemented for this value of RAND_MAX
#endif
--
2.27.0
From a6468234691fb0718fa0d57b9de4a7748f805af9 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sat, 3 Jul 2021 12:20:36 +0200
Subject: [PATCH 48/61] awk: match(): code shrink
function old new delta
do_match - 165 +165
exec_builtin_match 202 - -202
------------------------------------------------------------------------------
(add/remove: 1/1 grow/shrink: 0/0 up/down: 165/-202) Total: -37 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index e4dd6684c..649198d15 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2497,26 +2497,24 @@ static NOINLINE int do_mktime(const char *ds)
}
/* Reduce stack usage in exec_builtin() by keeping match() code separate */
-static NOINLINE void exec_builtin_match(node *an1, const char *as0, var *res)
+static NOINLINE var *do_match(node *an1, const char *as0)
{
regmatch_t pmatch[1];
regex_t sreg, *re;
- int n;
+ int n, start, len;
re = as_regex(an1, &sreg);
n = regexec(re, as0, 1, pmatch, 0);
- if (n == 0) {
- pmatch[0].rm_so++;
- pmatch[0].rm_eo++;
- } else {
- pmatch[0].rm_so = 0;
- pmatch[0].rm_eo = -1;
- }
if (re == &sreg)
regfree(re);
- setvar_i(newvar("RSTART"), pmatch[0].rm_so);
- setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
- setvar_i(res, pmatch[0].rm_so);
+ start = 0;
+ len = -1;
+ if (n == 0) {
+ start = pmatch[0].rm_so + 1;
+ len = pmatch[0].rm_eo - pmatch[0].rm_so;
+ }
+ setvar_i(newvar("RLENGTH"), len);
+ return setvar_i(newvar("RSTART"), start);
}
/* Reduce stack usage in evaluate() by keeping builtins' code separate */
@@ -2686,7 +2684,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
break;
case B_ma:
- exec_builtin_match(an[1], as[0], res);
+ res = do_match(an[1], as[0]);
break;
case B_ge:
--
2.27.0
From 9642f8123d92f8a1db9078178b04d22015d5e03a Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sat, 3 Jul 2021 13:29:32 +0200
Subject: [PATCH 49/61] awk: restore strdup elision optimization in assignment
function old new delta
evaluate 3339 3387 +48
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 649198d15..20672db9a 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -102,7 +102,7 @@ enum {
#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
-#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
+#define VF_FSTR 0x1000 /* 1 = don't free() var::string (not malloced, or is owned by something else) */
#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
@@ -1371,6 +1371,12 @@ static node *parse_expr(uint32_t term_tc)
cn->a.n = vn->a.n;
if (tc & TS_BINOP) {
cn->l.n = vn;
+//FIXME: this is the place to detect and reject assignments to non-lvalues.
+//Currently we allow "assignments" to consts and temporaries, nonsense like this:
+// awk 'BEGIN { "qwe" = 1 }'
+// awk 'BEGIN { 7 *= 7 }'
+// awk 'BEGIN { length("qwe") = 1 }'
+// awk 'BEGIN { (1+1) += 3 }'
expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
if ((t_info & OPCLSMASK) == OC_PGETLINE) {
/* it's a pipe */
@@ -3043,14 +3049,17 @@ static var *evaluate(node *op, var *res)
case XC( OC_MOVE ):
debug_printf_eval("MOVE\n");
/* if source is a temporary string, jusk relink it to dest */
-//Disabled: if R.v is numeric but happens to have cached R.v->string,
-//then L.v ends up being a string, which is wrong
-// if (R.v == TMPVAR1 && R.v->string) {
-// res = setvar_p(L.v, R.v->string);
-// R.v->string = NULL;
-// } else {
+ if (R.v == TMPVAR1
+ && !(R.v->type & VF_NUMBER)
+ /* Why check !NUMBER? if R.v is a number but has cached R.v->string,
+ * L.v ends up a string, which is wrong */
+ /*&& R.v->string - always not NULL (right?) */
+ ) {
+ res = setvar_p(L.v, R.v->string); /* avoids strdup */
+ R.v->string = NULL;
+ } else {
res = copyvar(L.v, R.v);
-// }
+ }
break;
case XC( OC_TERNARY ):
--
2.27.0
From c49ba79e1ce45367a1d994b12d972daae0698beb Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sat, 3 Jul 2021 13:57:47 +0200
Subject: [PATCH 50/61] awk: simplify tests for operation class
Usually, an operation class has only one possible value of "info" word.
In this case, just compare the entire info word, do not bother
to mask OPCLSMASK bits.
(Example where this is not the case: OC_REPLACE for "<op>=")
function old new delta
mk_splitter 106 100 -6
chain_group 616 610 -6
nextarg 40 32 -8
exec_builtin 1157 1149 -8
as_regex 111 103 -8
awk_split 553 543 -10
parse_expr 948 936 -12
awk_getline 656 642 -14
evaluate 3387 3343 -44
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/9 up/down: 0/-116) Total: -116 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 64 +++++++++++++++++++++++++++++----------------------
1 file changed, 36 insertions(+), 28 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 20672db9a..cd135ef64 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -432,7 +432,8 @@ static const char tokenlist[] ALIGN1 =
static const uint32_t tokeninfo[] ALIGN4 = {
0,
0,
- OC_REGEXP,
+#define TI_REGEXP OC_REGEXP
+ TI_REGEXP,
xS|'a', xS|'w', xS|'|',
OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
@@ -443,12 +444,17 @@ static const uint32_t tokeninfo[] ALIGN4 = {
OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
-#define TI_LESS (OC_COMPARE|VV|P(39)|2)
+#define TI_LESS (OC_COMPARE|VV|P(39)|2)
TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
- OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
- OC_IN|SV|P(49), /* TC_IN */
- OC_COMMA|SS|P(80),
- OC_PGETLINE|SV|P(37),
+#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?')
+#define TI_COLON (OC_COLON|xx|P(67)|':')
+ OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON,
+#define TI_IN (OC_IN|SV|P(49))
+ TI_IN,
+#define TI_COMMA (OC_COMMA|SS|P(80))
+ TI_COMMA,
+#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
+ TI_PGETLINE,
OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
0, /* ] */
0,
@@ -456,7 +462,8 @@ static const uint32_t tokeninfo[] ALIGN4 = {
0,
0, /* \n */
ST_IF, ST_DO, ST_FOR, OC_BREAK,
- OC_CONTINUE, OC_DELETE|Rx, OC_PRINT,
+#define TI_PRINT OC_PRINT
+ OC_CONTINUE, OC_DELETE|Rx, TI_PRINT,
OC_PRINTF, OC_NEXT, OC_NEXTFILE,
OC_RETURN|Vx, OC_EXIT|Nx,
ST_WHILE,
@@ -465,8 +472,8 @@ static const uint32_t tokeninfo[] ALIGN4 = {
// Highest byte bit pattern: nn s3s2s1 v3v2v1
// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
// OC_F's are builtins with zero or one argument.
-// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt.
-// Check for no args is present in builtins' code (not in this table): rand, systime.
+// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt
+// Check for no args is present in builtins' code (not in this table): rand, systime
// Have one _optional_ arg: fflush, srand, length
#define OC_B OC_BUILTIN
#define OC_F OC_FBLTIN
@@ -1310,7 +1317,7 @@ static node *new_node(uint32_t info)
static void mk_re_node(const char *s, node *n, regex_t *re)
{
- n->info = OC_REGEXP;
+ n->info = TI_REGEXP;
n->l.re = re;
n->r.ire = re + 1;
xregcomp(re, s, REG_EXTENDED);
@@ -1360,12 +1367,13 @@ static node *parse_expr(uint32_t term_tc)
* previous operators with higher priority */
vn = cn;
while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
- || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
+ || ((t_info == vn->info) && t_info == TI_COLON)
) {
vn = vn->a.n;
if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
}
- if ((t_info & OPCLSMASK) == OC_TERNARY)
+ if (t_info == TI_TERNARY)
+//TODO: why?
t_info += P(6);
cn = vn->a.n->r.n = new_node(t_info);
cn->a.n = vn->a.n;
@@ -1378,7 +1386,7 @@ static node *parse_expr(uint32_t term_tc)
// awk 'BEGIN { length("qwe") = 1 }'
// awk 'BEGIN { (1+1) += 3 }'
expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
- if ((t_info & OPCLSMASK) == OC_PGETLINE) {
+ if (t_info == TI_PGETLINE) {
/* it's a pipe */
next_token(TC_GETLINE);
/* give maximum priority to this pipe */
@@ -1630,7 +1638,7 @@ static void chain_group(void)
next_token(TC_LPAREN);
n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
if (t_tclass & TC_RPAREN) { /* for-in */
- if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
+ if (!n2 || n2->info != TI_IN)
syntax_error(EMSG_UNEXP_TOKEN);
n = chain_node(OC_WALKINIT | VV);
n->l.n = n2->l.n;
@@ -1834,7 +1842,7 @@ static node *mk_splitter(const char *s, tsplitter *spl)
re = &spl->re[0];
ire = &spl->re[1];
n = &spl->n;
- if ((n->info & OPCLSMASK) == OC_REGEXP) {
+ if (n->info == TI_REGEXP) {
regfree(re);
regfree(ire); // TODO: nuke ire, use re+1?
}
@@ -1858,7 +1866,7 @@ static regex_t *as_regex(node *op, regex_t *preg)
int cflags;
const char *s;
- if ((op->info & OPCLSMASK) == OC_REGEXP) {
+ if (op->info == TI_REGEXP) {
return icase ? op->r.ire : op->l.re;
}
@@ -1968,7 +1976,7 @@ static int awk_split(const char *s, node *spl, char **slist)
c[2] = '\n';
n = 0;
- if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
+ if (spl->info == TI_REGEXP) { /* regex split */
if (!*s)
return n; /* "": zero fields */
n++; /* at least one field will be there */
@@ -2135,7 +2143,7 @@ static node *nextarg(node **pn)
node *n;
n = *pn;
- if (n && (n->info & OPCLSMASK) == OC_COMMA) {
+ if (n && n->info == TI_COMMA) {
*pn = n->r.n;
n = n->l.n;
} else {
@@ -2229,7 +2237,7 @@ static int awk_getline(rstream *rsm, var *v)
so = eo = p;
r = 1;
if (p > 0) {
- if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
+ if (rsplitter.n.info == TI_REGEXP) {
if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
b, 1, pmatch, 0) == 0) {
so = pmatch[0].rm_so;
@@ -2575,8 +2583,8 @@ static NOINLINE var *exec_builtin(node *op, var *res)
char *s, *s1;
if (nargs > 2) {
- spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
- an[2] : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
+ spl = (an[2]->info == TI_REGEXP) ? an[2]
+ : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
} else {
spl = &fsplitter.n;
}
@@ -2860,7 +2868,7 @@ static var *evaluate(node *op, var *res)
/* test pattern */
case XC( OC_TEST ):
debug_printf_eval("TEST\n");
- if ((op1->info & OPCLSMASK) == OC_COMMA) {
+ if (op1->info == TI_COMMA) {
/* it's range pattern */
if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
op->info |= OF_CHECKED;
@@ -2921,7 +2929,7 @@ static var *evaluate(node *op, var *res)
F = rsm->F;
}
- if ((opinfo & OPCLSMASK) == OC_PRINT) {
+ if (opinfo == TI_PRINT) {
if (!op1) {
fputs(getvar_s(intvar[F0]), F);
} else {
@@ -2940,7 +2948,7 @@ static var *evaluate(node *op, var *res)
}
}
fputs(getvar_s(intvar[ORS]), F);
- } else { /* OC_PRINTF */
+ } else { /* PRINTF */
char *s = awk_printf(op1, &len);
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
fwrite(s, len, 1, F);
@@ -3064,7 +3072,7 @@ static var *evaluate(node *op, var *res)
case XC( OC_TERNARY ):
debug_printf_eval("TERNARY\n");
- if ((op->r.n->info & OPCLSMASK) != OC_COLON)
+ if (op->r.n->info != TI_COLON)
syntax_error(EMSG_POSSIBLE_ERROR);
res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
break;
@@ -3122,7 +3130,7 @@ static var *evaluate(node *op, var *res)
if (op1) {
rsm = newfile(L.s);
if (!rsm->F) {
- if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
+ if (opinfo == TI_PGETLINE) {
rsm->F = popen(L.s, "r");
rsm->is_pipe = TRUE;
} else {
@@ -3158,7 +3166,7 @@ static var *evaluate(node *op, var *res)
double R_d = R_d; /* for compiler */
debug_printf_eval("FBLTIN\n");
- if (op1 && (op1->info & OPCLSMASK) == OC_COMMA)
+ if (op1 && op1->info == TI_COMMA)
/* Simple builtins take one arg maximum */
syntax_error("Too many arguments");
@@ -3358,7 +3366,7 @@ static var *evaluate(node *op, var *res)
case XC( OC_COMMA ): {
const char *sep = "";
debug_printf_eval("COMMA\n");
- if ((opinfo & OPCLSMASK) == OC_COMMA)
+ if (opinfo == TI_COMMA)
sep = getvar_s(intvar[SUBSEP]);
setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
break;
--
2.27.0
From 39122ab01367775898f3f46394942138176b4101 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sun, 4 Jul 2021 01:25:34 +0200
Subject: [PATCH 51/61] awk: fix printf buffer overflow
function old new delta
awk_printf 468 546 +78
fmt_num 239 247 +8
getvar_s 125 111 -14
evaluate 3343 3329 -14
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/2 up/down: 86/-28) Total: 58 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 94 ++++++++++++++++++++++++++++++---------------------
1 file changed, 55 insertions(+), 39 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index cd135ef64..a440a6234 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -904,25 +904,23 @@ static double my_strtod(char **pp)
/* -------- working with variables (set/get/copy/etc) -------- */
-static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
+static void fmt_num(const char *format, double n)
{
- int r = 0;
- char c;
- const char *s = format;
-
- if (int_as_int && n == (long long)n) {
- r = snprintf(b, size, "%lld", (long long)n);
+ if (n == (long long)n) {
+ snprintf(g_buf, MAXVARFMT, "%lld", (long long)n);
} else {
+ const char *s = format;
+ char c;
+
do { c = *s; } while (c && *++s);
if (strchr("diouxX", c)) {
- r = snprintf(b, size, format, (int)n);
+ snprintf(g_buf, MAXVARFMT, format, (int)n);
} else if (strchr("eEfFgGaA", c)) {
- r = snprintf(b, size, format, n);
+ snprintf(g_buf, MAXVARFMT, format, n);
} else {
syntax_error(EMSG_INV_FMT);
}
}
- return r;
}
static xhash *iamarray(var *a)
@@ -999,7 +997,7 @@ static const char *getvar_s(var *v)
{
/* if v is numeric and has no cached string, convert it to string */
if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
+ fmt_num(getvar_s(intvar[CONVFMT]), v->number);
v->string = xstrdup(g_buf);
v->type |= VF_CACHED;
}
@@ -2315,12 +2313,9 @@ static int awk_getline(rstream *rsm, var *v)
#endif
static char *awk_printf(node *n, int *len)
{
- char *b = NULL;
- char *fmt, *s, *f;
- const char *s1;
- int i, j, incr, bsize;
- char c, c1;
- var *arg;
+ char *b;
+ char *fmt, *f;
+ int i;
//tmpvar = nvalloc(1);
#define TMPVAR (&G.awk_printf__tmpvar)
@@ -2333,8 +2328,14 @@ static char *awk_printf(node *n, int *len)
// to evaluate() potentially recursing into another awk_printf() can't
// mangle the value.
+ b = NULL;
i = 0;
- while (*f) {
+ while (*f) { /* "print one format spec" loop */
+ char *s;
+ char c;
+ char sv;
+ var *arg;
+
s = f;
while (*f && (*f != '%' || *++f == '%'))
f++;
@@ -2343,40 +2344,55 @@ static char *awk_printf(node *n, int *len)
syntax_error("%*x formats are not supported");
f++;
}
-
- incr = (f - s) + MAXVARFMT;
- b = qrealloc(b, incr + i, &bsize);
c = *f;
- if (c != '\0')
- f++;
- c1 = *f;
+ if (!c) {
+ /* Tail of fmt with no percent chars,
+ * or "....%" (percent seen, but no format specifier char found)
+ */
+ goto tail;
+ }
+ sv = *++f;
*f = '\0';
arg = evaluate(nextarg(&n), TMPVAR);
- j = i;
- if (c == 'c' || !c) {
- i += sprintf(b+i, s, is_numeric(arg) ?
+ /* Result can be arbitrarily long. Example:
+ * printf "%99999s", "BOOM"
+ */
+ if (c == 'c') {
+ s = xasprintf(s, is_numeric(arg) ?
(char)getvar_i(arg) : *getvar_s(arg));
} else if (c == 's') {
- s1 = getvar_s(arg);
- b = qrealloc(b, incr+i+strlen(s1), &bsize);
- i += sprintf(b+i, s, s1);
+ s = xasprintf(s, getvar_s(arg));
} else {
- i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
+ double d = getvar_i(arg);
+ if (strchr("diouxX", c)) {
+//TODO: make it wider here (%x -> %llx etc)?
+ s = xasprintf(s, (int)d);
+ } else if (strchr("eEfFgGaA", c)) {
+ s = xasprintf(s, d);
+ } else {
+ syntax_error(EMSG_INV_FMT);
+ }
}
- *f = c1;
+ *f = sv;
- /* if there was an error while sprintf, return value is negative */
- if (i < j)
- i = j;
+ if (i == 0) {
+ b = s;
+ i = strlen(b);
+ continue;
+ }
+ tail:
+ b = xrealloc(b, i + strlen(s) + 1);
+ i = stpcpy(b + i, s) - b;
+ if (!c) /* tail? */
+ break;
+ free(s);
}
free(fmt);
//nvfree(tmpvar, 1);
#undef TMPVAR
- b = xrealloc(b, i + 1);
- b[i] = '\0';
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
if (len)
*len = i;
@@ -2936,8 +2952,8 @@ static var *evaluate(node *op, var *res)
for (;;) {
var *v = evaluate(nextarg(&op1), TMPVAR0);
if (v->type & VF_NUMBER) {
- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
- getvar_i(v), TRUE);
+ fmt_num(getvar_s(intvar[OFMT]),
+ getvar_i(v));
fputs(g_buf, F);
} else {
fputs(getvar_s(v), F);
--
2.27.0
From 9c55f6ae3f528a3416368e0aff9942d5b4ed216d Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sun, 11 Jul 2021 11:46:21 +0200
Subject: [PATCH 52/61] awk: rollback_token() + chain_group() ==
chain_until_rbrace()
function old new delta
parse_program 336 332 -4
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index a440a6234..755e68fc7 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1778,8 +1778,7 @@ static void parse_program(char *p)
cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
if (t_tclass == TC_LBRACE) {
debug_printf_parse("%s: TC_LBRACE\n", __func__);
- rollback_token();
- chain_group();
+ chain_until_rbrace();
} else {
/* no action, assume default "{ print }" */
debug_printf_parse("%s: !TC_LBRACE\n", __func__);
--
2.27.0
From bd0d2c3b5bf5c9337e67b43222bafcdf80c4e36a Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sun, 11 Jul 2021 12:00:31 +0200
Subject: [PATCH 53/61] awk: undo TI_PRINT, it introduced a bug (print with any
redirect acting as printf)
function old new delta
evaluate 3329 3337 +8
Patch by Ron Yorston <[email protected]>
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 8 +++++---
testsuite/awk.tests | 5 +++++
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 755e68fc7..0aa7c0804 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -462,8 +462,7 @@ static const uint32_t tokeninfo[] ALIGN4 = {
0,
0, /* \n */
ST_IF, ST_DO, ST_FOR, OC_BREAK,
-#define TI_PRINT OC_PRINT
- OC_CONTINUE, OC_DELETE|Rx, TI_PRINT,
+ OC_CONTINUE, OC_DELETE|Rx, OC_PRINT,
OC_PRINTF, OC_NEXT, OC_NEXTFILE,
OC_RETURN|Vx, OC_EXIT|Nx,
ST_WHILE,
@@ -2944,7 +2943,10 @@ static var *evaluate(node *op, var *res)
F = rsm->F;
}
- if (opinfo == TI_PRINT) {
+ /* Can't just check 'opinfo == OC_PRINT' here, parser ORs
+ * additional bits to opinfos of print/printf with redirects
+ */
+ if ((opinfo & OPCLSMASK) == OC_PRINT) {
if (!op1) {
fputs(getvar_s(intvar[F0]), F);
} else {
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 770d8ffce..6b23b91cb 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -450,4 +450,9 @@ testing "awk exit N propagates through END's exit" \
"42\n" \
'' ''
+testing "awk print + redirect" \
+ "awk 'BEGIN { print \"STDERR %s\" >\"/dev/stderr\" }' 2>&1" \
+ "STDERR %s\n" \
+ '' ''
+
exit $FAILCOUNT
--
2.27.0
From 5ed199c07d9ffc947443118dda0e0af6569588d5 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sun, 11 Jul 2021 12:25:33 +0200
Subject: [PATCH 54/61] awk: unbreak "printf('%c') can output NUL" testcase
function old new delta
awk_printf 546 593 +47
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 43 ++++++++++++++++++++++++++-----------------
1 file changed, 26 insertions(+), 17 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 0aa7c0804..e765d3fcf 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2309,11 +2309,11 @@ static int awk_getline(rstream *rsm, var *v)
#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
# define awk_printf(a, b) awk_printf(a)
#endif
-static char *awk_printf(node *n, int *len)
+static char *awk_printf(node *n, size_t *len)
{
char *b;
char *fmt, *f;
- int i;
+ size_t i;
//tmpvar = nvalloc(1);
#define TMPVAR (&G.awk_printf__tmpvar)
@@ -2333,6 +2333,7 @@ static char *awk_printf(node *n, int *len)
char c;
char sv;
var *arg;
+ size_t slen;
s = f;
while (*f && (*f != '%' || *++f == '%'))
@@ -2347,6 +2348,7 @@ static char *awk_printf(node *n, int *len)
/* Tail of fmt with no percent chars,
* or "....%" (percent seen, but no format specifier char found)
*/
+ slen = strlen(s);
goto tail;
}
sv = *++f;
@@ -2357,31 +2359,38 @@ static char *awk_printf(node *n, int *len)
* printf "%99999s", "BOOM"
*/
if (c == 'c') {
- s = xasprintf(s, is_numeric(arg) ?
- (char)getvar_i(arg) : *getvar_s(arg));
- } else if (c == 's') {
- s = xasprintf(s, getvar_s(arg));
+ c = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
+ s = xasprintf(s, c);
+ /* + 1 if c == NUL: handle printf "%c" 0 case
+ * (and printf "%22c" 0 etc, but still fails for e.g. printf "%-22c" 0) */
+ slen = strlen(s) + (c == '\0');
} else {
- double d = getvar_i(arg);
- if (strchr("diouxX", c)) {
-//TODO: make it wider here (%x -> %llx etc)?
- s = xasprintf(s, (int)d);
- } else if (strchr("eEfFgGaA", c)) {
- s = xasprintf(s, d);
+ if (c == 's') {
+ s = xasprintf(s, getvar_s(arg));
} else {
- syntax_error(EMSG_INV_FMT);
+ double d = getvar_i(arg);
+ if (strchr("diouxX", c)) {
+//TODO: make it wider here (%x -> %llx etc)?
+ s = xasprintf(s, (int)d);
+ } else if (strchr("eEfFgGaA", c)) {
+ s = xasprintf(s, d);
+ } else {
+ syntax_error(EMSG_INV_FMT);
+ }
}
+ slen = strlen(s);
}
*f = sv;
if (i == 0) {
b = s;
- i = strlen(b);
+ i = slen;
continue;
}
tail:
- b = xrealloc(b, i + strlen(s) + 1);
- i = stpcpy(b + i, s) - b;
+ b = xrealloc(b, i + slen + 1);
+ strcpy(b + i, s);
+ i += slen;
if (!c) /* tail? */
break;
free(s);
@@ -2926,7 +2935,6 @@ static var *evaluate(node *op, var *res)
debug_printf_eval("PRINTF\n");
{
FILE *F = stdout;
- IF_FEATURE_AWK_GNU_EXTENSIONS(int len;)
if (op->r.n) {
rstream *rsm = newfile(R.s);
@@ -2966,6 +2974,7 @@ static var *evaluate(node *op, var *res)
}
fputs(getvar_s(intvar[ORS]), F);
} else { /* PRINTF */
+ IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;)
char *s = awk_printf(op1, &len);
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
fwrite(s, len, 1, F);
--
2.27.0
From f38b2d9bcddd00432150567bef8f8a2bf0d1ed43 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sun, 11 Jul 2021 12:51:43 +0200
Subject: [PATCH 55/61] awk: unbreak "cmd" | getline
function old new delta
evaluate 3337 3343 +6
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 3 ++-
testsuite/awk.tests | 5 +++++
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/editors/awk.c b/editors/awk.c
index e765d3fcf..6c60a0615 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -3156,7 +3156,8 @@ static var *evaluate(node *op, var *res)
if (op1) {
rsm = newfile(L.s);
if (!rsm->F) {
- if (opinfo == TI_PGETLINE) {
+ /* NB: can't use "opinfo == TI_PGETLINE", would break "cmd" | getline */
+ if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
rsm->F = popen(L.s, "r");
rsm->is_pipe = TRUE;
} else {
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 6b23b91cb..242c897d1 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -455,4 +455,9 @@ testing "awk print + redirect" \
"STDERR %s\n" \
'' ''
+testing "awk \"cmd\" | getline" \
+ "awk 'BEGIN { \"echo HELLO\" | getline; print }'" \
+ "HELLO\n" \
+ '' ''
+
exit $FAILCOUNT
--
2.27.0
From 3a759a81580a1f7d9b4428e30c623324ec2e3699 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Sun, 11 Jul 2021 18:16:10 +0200
Subject: [PATCH 56/61] awk: fix corner case in awk_printf
Example where it wasn't working:
awk 'BEGIN { printf "qwe %s rty %c uio\n", "a", 0, "c" }'
- the NUL printing in %c caused premature stop of printing.
function old new delta
awk_printf 593 596 +3
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 6c60a0615..465033f5f 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2359,11 +2359,11 @@ static char *awk_printf(node *n, size_t *len)
* printf "%99999s", "BOOM"
*/
if (c == 'c') {
- c = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
- s = xasprintf(s, c);
- /* + 1 if c == NUL: handle printf "%c" 0 case
+ char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
+ s = xasprintf(s, cc);
+ /* + 1 if cc == NUL: handle printf "%c" 0 case
* (and printf "%22c" 0 etc, but still fails for e.g. printf "%-22c" 0) */
- slen = strlen(s) + (c == '\0');
+ slen = strlen(s) + (cc == '\0');
} else {
if (c == 's') {
s = xasprintf(s, getvar_s(arg));
--
2.27.0
From e62366d32f13e059266e2996a68be023bef309ef Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Mon, 12 Jul 2021 11:27:11 +0200
Subject: [PATCH 57/61] awk: fix printf "%-10c", 0
function old new delta
awk_printf 596 626 +30
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 9 +++++----
testsuite/awk.tests | 8 ++++++++
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 465033f5f..437d87ecf 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2360,10 +2360,11 @@ static char *awk_printf(node *n, size_t *len)
*/
if (c == 'c') {
char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
- s = xasprintf(s, cc);
- /* + 1 if cc == NUL: handle printf "%c" 0 case
- * (and printf "%22c" 0 etc, but still fails for e.g. printf "%-22c" 0) */
- slen = strlen(s) + (cc == '\0');
+ char *r = xasprintf(s, cc ? cc : '^' /* else strlen will be wrong */);
+ slen = strlen(r);
+ if (cc == '\0') /* if cc is NUL, re-format the string with it */
+ sprintf(r, s, cc);
+ s = r;
} else {
if (c == 's') {
s = xasprintf(s, getvar_s(arg));
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 242c897d1..3cddb4dd4 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -415,6 +415,14 @@ testing "awk printf('%c') can output NUL" \
"awk '{printf(\"hello%c null\n\", 0)}'" "hello\0 null\n" "" "\n"
SKIP=
+optional FEATURE_AWK_GNU_EXTENSIONS
+testing "awk printf('%-10c') can output NUL" \
+ "awk 'BEGIN { printf \"[%-10c]\n\", 0 }' | od -tx1" "\
+0000000 5b 00 20 20 20 20 20 20 20 20 20 5d 0a
+0000015
+" "" ""
+SKIP=
+
# testing "description" "command" "result" "infile" "stdin"
testing 'awk negative field access' \
'awk 2>&1 -- '\''{ $(-1) }'\' \
--
2.27.0
From 258057e67d4403d43f48788fabdf874c1bb59502 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Mon, 12 Jul 2021 13:30:30 +0200
Subject: [PATCH 58/61] awk: in parsing, remove superfluous NEWLINE check;
optimize builtin arg evaluation
function old new delta
exec_builtin 1149 1145 -4
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 437d87ecf..7a282356d 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1589,8 +1589,8 @@ static void chain_group(void)
chain_until_rbrace();
return;
}
- if (tc & (TS_OPSEQ | TC_SEMICOL | TC_NEWLINE)) {
- debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL | TC_NEWLINE\n", __func__);
+ if (tc & (TS_OPSEQ | TC_SEMICOL)) {
+ debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__);
rollback_token();
chain_expr(OC_EXEC | Vx);
return;
@@ -2582,10 +2582,11 @@ static NOINLINE var *exec_builtin(node *op, var *res)
av[2] = av[3] = NULL;
for (i = 0; i < 4 && op; i++) {
an[i] = nextarg(&op);
- if (isr & 0x09000000)
+ if (isr & 0x09000000) {
av[i] = evaluate(an[i], TMPVAR(i));
- if (isr & 0x08000000)
- as[i] = getvar_s(av[i]);
+ if (isr & 0x08000000)
+ as[i] = getvar_s(av[i]);
+ }
isr >>= 1;
}
--
2.27.0
From 18fe636700ac5d795027d920922340410f65640e Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Wed, 14 Jul 2021 14:25:07 +0200
Subject: [PATCH 59/61] awk: tighten parsing - disallow extra semicolons
'; BEGIN {...}' and 'BEGIN {...} ;; {...}' are not accepted by gawk
function old new delta
parse_program 332 353 +21
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 40 ++++++++++++++++++++++++----------------
1 file changed, 24 insertions(+), 16 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 7a282356d..2f8a18c8e 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1634,7 +1634,7 @@ static void chain_group(void)
debug_printf_parse("%s: ST_FOR\n", __func__);
next_token(TC_LPAREN);
n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
- if (t_tclass & TC_RPAREN) { /* for-in */
+ if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */
if (!n2 || n2->info != TI_IN)
syntax_error(EMSG_UNEXP_TOKEN);
n = chain_node(OC_WALKINIT | VV);
@@ -1700,20 +1700,15 @@ static void parse_program(char *p)
for (;;) {
uint32_t tclass;
- tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
- TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL);
-
+ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
+ | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
+ got_tok:
if (tclass == TC_EOF) {
debug_printf_parse("%s: TC_EOF\n", __func__);
break;
}
- if (tclass & (TC_SEMICOL | TC_NEWLINE)) {
- debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__);
-//NB: gawk allows many newlines, but does not allow more than one semicolon:
-// BEGIN {...}<newline>;<newline>;
-//would complain "each rule must have a pattern or an action part".
-//Same message for
-// ; BEGIN {...}
+ if (tclass == TC_NEWLINE) {
+ debug_printf_parse("%s: TC_NEWLINE\n", __func__);
continue;
}
if (tclass == TC_BEGIN) {
@@ -1722,7 +1717,7 @@ static void parse_program(char *p)
/* ensure there is no newline between BEGIN and { */
next_token(TC_LBRACE);
chain_until_rbrace();
- continue;
+ goto next_tok;
}
if (tclass == TC_END) {
debug_printf_parse("%s: TC_END\n", __func__);
@@ -1730,7 +1725,7 @@ static void parse_program(char *p)
/* ensure there is no newline between END and { */
next_token(TC_LBRACE);
chain_until_rbrace();
- continue;
+ goto next_tok;
}
if (tclass == TC_FUNCDECL) {
func *f;
@@ -1765,7 +1760,7 @@ static void parse_program(char *p)
continue;
chain_until_rbrace();
hash_clear(ahash);
- continue;
+ goto next_tok;
}
seq = &mainseq;
if (tclass & TS_OPSEQ) {
@@ -1784,12 +1779,25 @@ static void parse_program(char *p)
chain_node(OC_PRINT);
}
cn->r.n = mainseq.last;
- continue;
+ goto next_tok;
}
/* tclass == TC_LBRACE */
debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
chain_until_rbrace();
- }
+ next_tok:
+ /* Same as next_token() at the top of the loop, + TC_SEMICOL */
+ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
+ | TC_EOF | TC_NEWLINE | TC_SEMICOL);
+ /* gawk allows many newlines, but does not allow more than one semicolon:
+ * BEGIN {...}<newline>;<newline>;
+ * would complain "each rule must have a pattern or an action part".
+ * Same message for
+ * ; BEGIN {...}
+ */
+ if (tclass != TC_SEMICOL)
+ goto got_tok; /* use this token */
+ /* else: loop back - ate the semicolon, get and use _next_ token */
+ } /* for (;;) */
}
--
2.27.0
From 9b502f61277aa48a412dd1a18e7a30b5d4c3d71a Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Wed, 14 Jul 2021 14:33:37 +0200
Subject: [PATCH 60/61] awk: disallow break/continue outside of loops
function old new delta
.rodata 104139 104186 +47
chain_group 610 633 +23
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 70/0) Total: 70 bytes
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 6 ++++--
testsuite/awk.tests | 9 ++-------
2 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 2f8a18c8e..607d69487 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1671,16 +1671,18 @@ static void chain_group(void)
case OC_BREAK:
debug_printf_parse("%s: OC_BREAK\n", __func__);
n = chain_node(OC_EXEC);
+ if (!break_ptr)
+ syntax_error("'break' not in a loop");
n->a.n = break_ptr;
-//TODO: if break_ptr is NULL, syntax error (not in the loop)?
chain_expr(t_info);
break;
case OC_CONTINUE:
debug_printf_parse("%s: OC_CONTINUE\n", __func__);
n = chain_node(OC_EXEC);
+ if (!continue_ptr)
+ syntax_error("'continue' not in a loop");
n->a.n = continue_ptr;
-//TODO: if continue_ptr is NULL, syntax error (not in the loop)?
chain_expr(t_info);
break;
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 3cddb4dd4..f53b1efe2 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -379,19 +379,14 @@ testing "awk -e and ARGC" \
""
SKIP=
-# The examples are in fact not valid awk programs (break/continue
-# can only be used inside loops).
-# But we do accept them outside of loops.
-# We had a bug with misparsing "break ; else" sequence.
-# Test that *that* bug is fixed, using simplest possible scripts:
testing "awk break" \
"awk -f - 2>&1; echo \$?" \
- "0\n" \
+ "awk: -:1: 'break' not in a loop\n1\n" \
"" \
'BEGIN { if (1) break; else a = 1 }'
testing "awk continue" \
"awk -f - 2>&1; echo \$?" \
- "0\n" \
+ "awk: -:1: 'continue' not in a loop\n1\n" \
"" \
'BEGIN { if (1) continue; else a = 1 }'
--
2.27.0
From 027b43ab6700b85f037fb69c08ad052cff6a7384 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <[email protected]>
Date: Wed, 14 Jul 2021 16:58:05 +0200
Subject: [PATCH 61/61] awk: whitespace and debugging tweaks
Signed-off-by: Denys Vlasenko <[email protected]>
---
editors/awk.c | 133 +++++++++++++++++++++++++-------------------------
1 file changed, 66 insertions(+), 67 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 607d69487..3adbca7aa 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -199,77 +199,78 @@ typedef struct tsplitter_s {
/* simple token classes */
/* order and hex values are very important!!! See next_token() */
-#define TC_LPAREN (1 << 0) /* ( */
-#define TC_RPAREN (1 << 1) /* ) */
-#define TC_REGEXP (1 << 2) /* /.../ */
-#define TC_OUTRDR (1 << 3) /* | > >> */
-#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
-#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
-#define TC_BINOPX (1 << 6) /* two-opnd operator */
-#define TC_IN (1 << 7) /* 'in' */
-#define TC_COMMA (1 << 8) /* , */
-#define TC_PIPE (1 << 9) /* input redirection pipe | */
-#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
-#define TC_ARRTERM (1 << 11) /* ] */
-#define TC_LBRACE (1 << 12) /* { */
-#define TC_RBRACE (1 << 13) /* } */
-#define TC_SEMICOL (1 << 14) /* ; */
-#define TC_NEWLINE (1 << 15)
-#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
-#define TC_WHILE (1 << 17) /* 'while' */
-#define TC_ELSE (1 << 18) /* 'else' */
-#define TC_BUILTIN (1 << 19)
+#define TC_LPAREN (1 << 0) /* ( */
+#define TC_RPAREN (1 << 1) /* ) */
+#define TC_REGEXP (1 << 2) /* /.../ */
+#define TC_OUTRDR (1 << 3) /* | > >> */
+#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
+#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
+#define TC_BINOPX (1 << 6) /* two-opnd operator */
+#define TC_IN (1 << 7) /* 'in' */
+#define TC_COMMA (1 << 8) /* , */
+#define TC_PIPE (1 << 9) /* input redirection pipe | */
+#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
+#define TC_ARRTERM (1 << 11) /* ] */
+#define TC_LBRACE (1 << 12) /* { */
+#define TC_RBRACE (1 << 13) /* } */
+#define TC_SEMICOL (1 << 14) /* ; */
+#define TC_NEWLINE (1 << 15)
+#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
+#define TC_WHILE (1 << 17) /* 'while' */
+#define TC_ELSE (1 << 18) /* 'else' */
+#define TC_BUILTIN (1 << 19)
/* This costs ~50 bytes of code.
* A separate class to support deprecated "length" form. If we don't need that
* (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
* can be merged with TC_BUILTIN:
*/
-#define TC_LENGTH (1 << 20) /* 'length' */
-#define TC_GETLINE (1 << 21) /* 'getline' */
-#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
-#define TC_BEGIN (1 << 23) /* 'BEGIN' */
-#define TC_END (1 << 24) /* 'END' */
-#define TC_EOF (1 << 25)
-#define TC_VARIABLE (1 << 26) /* name */
-#define TC_ARRAY (1 << 27) /* name[ */
-#define TC_FUNCTION (1 << 28) /* name( */
-#define TC_STRING (1 << 29) /* "..." */
-#define TC_NUMBER (1 << 30)
+#define TC_LENGTH (1 << 20) /* 'length' */
+#define TC_GETLINE (1 << 21) /* 'getline' */
+#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
+#define TC_BEGIN (1 << 23) /* 'BEGIN' */
+#define TC_END (1 << 24) /* 'END' */
+#define TC_EOF (1 << 25)
+#define TC_VARIABLE (1 << 26) /* name */
+#define TC_ARRAY (1 << 27) /* name[ */
+#define TC_FUNCTION (1 << 28) /* name( */
+#define TC_STRING (1 << 29) /* "..." */
+#define TC_NUMBER (1 << 30)
#ifndef debug_parse_print_tc
-#define debug_parse_print_tc(n) do { \
-if ((n) & TC_LPAREN ) debug_printf_parse(" LPAREN" ); \
-if ((n) & TC_RPAREN ) debug_printf_parse(" RPAREN" ); \
-if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \
-if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \
-if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \
-if ((n) & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" ); \
-if ((n) & TC_BINOPX ) debug_printf_parse(" BINOPX" ); \
-if ((n) & TC_IN ) debug_printf_parse(" IN" ); \
-if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \
-if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \
-if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \
-if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \
-if ((n) & TC_LBRACE ) debug_printf_parse(" LBRACE" ); \
-if ((n) & TC_RBRACE ) debug_printf_parse(" RBRACE" ); \
-if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \
-if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \
-if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \
-if ((n) & TC_WHILE ) debug_printf_parse(" WHILE" ); \
-if ((n) & TC_ELSE ) debug_printf_parse(" ELSE" ); \
-if ((n) & TC_BUILTIN ) debug_printf_parse(" BUILTIN" ); \
-if ((n) & TC_LENGTH ) debug_printf_parse(" LENGTH" ); \
-if ((n) & TC_GETLINE ) debug_printf_parse(" GETLINE" ); \
-if ((n) & TC_FUNCDECL) debug_printf_parse(" FUNCDECL"); \
-if ((n) & TC_BEGIN ) debug_printf_parse(" BEGIN" ); \
-if ((n) & TC_END ) debug_printf_parse(" END" ); \
-if ((n) & TC_EOF ) debug_printf_parse(" EOF" ); \
-if ((n) & TC_VARIABLE) debug_printf_parse(" VARIABLE"); \
-if ((n) & TC_ARRAY ) debug_printf_parse(" ARRAY" ); \
-if ((n) & TC_FUNCTION) debug_printf_parse(" FUNCTION"); \
-if ((n) & TC_STRING ) debug_printf_parse(" STRING" ); \
-if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
-} while (0)
+static void debug_parse_print_tc(uint32_t n)
+{
+ if (n & TC_LPAREN ) debug_printf_parse(" LPAREN" );
+ if (n & TC_RPAREN ) debug_printf_parse(" RPAREN" );
+ if (n & TC_REGEXP ) debug_printf_parse(" REGEXP" );
+ if (n & TC_OUTRDR ) debug_printf_parse(" OUTRDR" );
+ if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" );
+ if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" );
+ if (n & TC_BINOPX ) debug_printf_parse(" BINOPX" );
+ if (n & TC_IN ) debug_printf_parse(" IN" );
+ if (n & TC_COMMA ) debug_printf_parse(" COMMA" );
+ if (n & TC_PIPE ) debug_printf_parse(" PIPE" );
+ if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" );
+ if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" );
+ if (n & TC_LBRACE ) debug_printf_parse(" LBRACE" );
+ if (n & TC_RBRACE ) debug_printf_parse(" RBRACE" );
+ if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" );
+ if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" );
+ if (n & TC_STATX ) debug_printf_parse(" STATX" );
+ if (n & TC_WHILE ) debug_printf_parse(" WHILE" );
+ if (n & TC_ELSE ) debug_printf_parse(" ELSE" );
+ if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" );
+ if (n & TC_LENGTH ) debug_printf_parse(" LENGTH" );
+ if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" );
+ if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL");
+ if (n & TC_BEGIN ) debug_printf_parse(" BEGIN" );
+ if (n & TC_END ) debug_printf_parse(" END" );
+ if (n & TC_EOF ) debug_printf_parse(" EOF" );
+ if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE");
+ if (n & TC_ARRAY ) debug_printf_parse(" ARRAY" );
+ if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION");
+ if (n & TC_STRING ) debug_printf_parse(" STRING" );
+ if (n & TC_NUMBER ) debug_printf_parse(" NUMBER" );
+}
#endif
/* combined token classes ("token [class] sets") */
@@ -417,7 +418,7 @@ static const char tokenlist[] ALIGN1 =
"\5close" "\6system" "\6fflush" "\5atan2"
"\3cos" "\3exp" "\3int" "\3log"
"\4rand" "\3sin" "\4sqrt" "\5srand"
- "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
+ "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
"\5match" "\5split" "\7sprintf" "\3sub"
"\6substr" "\7systime" "\10strftime" "\6mktime"
"\7tolower" "\7toupper" NTC
@@ -1802,7 +1803,6 @@ static void parse_program(char *p)
} /* for (;;) */
}
-
/* -------- program execution part -------- */
/* temporary variables allocator */
@@ -3510,7 +3510,6 @@ static var *evaluate(node *op, var *res)
#undef sreg
}
-
/* -------- main & co. -------- */
static int awk_exit(void)
--
2.27.0
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。