学算法/短程序用gawk挺好

function KMP(s, m, p, n) {
  if (n == 0 || m == 0) return;
  split(s, str, ""); split(p, pattern, "");
  # https://www.tutorialspoint.com/awk/awk_string_functions.htm
  # split返回的数组,[0]是"",所以下面是PASCAL版。可以用个for循环把str[1:]复制到新数组换0-based版
  # https://blog.csdn.net/idler/article/details/4846
  i = 1; j = 0; while (i < n) {
    if (j == 0 || pattern[j] == pattern[i]) na[++i] = ++j;
    else j = na[j];
  }
  for (i = 1; i <= n; i++) printf("%d", na[i]);
  print;
  for (i = j = 1; i <= m;) {
    if (j == 0 || pattern[j] == str[i]) { ++i; ++j; }
    else j = na[j];
    if (j > n) { printf("%d\n", i - n - 1); j = 1; }
  }
}
BEGIN {
  _str = "AABACAABAAAABAA12345AABAA";
  _ptn = "AABAA";
  m = length(_str); n = length(_ptn); # m长得像nn, 所以比较长
  KMP(_str, m, _ptn, n);
}
# 01212
# 12345
# AAAB...
# AAB
#   i=j=3, na[3]=2
#  AAB (12345这样的pattern, KMP优势明显,next里的数越小滑动越远)

数组是关联数组/map/词典:

BEGIN {
  str = "a b\tc  de" # 语句不用;结尾
  split(str, ary, "[ \t]+") # C还得用外部库
  for (i in ary) printf("%d %s\n", i, ary[i])
  print # 比puts(""); 短
  for (i = 0; i < length(ary); i++) printf("%d %s\n", i, ary[i])
  ary["tom"] = "cat"; ary[3.14] = 5678 # 关联数组/map
  print; print(ary["tom"], ary[3.14])
}

能写递归,可惜没有局部变量(但能凑合):

function factorial(n,
  i) { # i是"局部"变量,调用时不用传递。
  if (n == 0) return 1;
  m = i = n;
  t = n * factorial(n - 1);
  print(m, i);
  return t;
}
BEGIN {
  printf("%d", factorial(3));
}

改了个山寨版:

func factorial(n) 
local i, j
{
  if (n == 0) return 1;
  m = i = j = n;
  t = n * factorial(n - 1);
  print(m, i, j);
  return t;
}
BEGIN {
  n = input("n=");
  printf("%d", factorial(n));
}

awkgram.y:
LEX_FUNCTION func_name '(' opt_param_list ')' LEX_LOCAL param_list {
 if (install_function($2->lextok, $1, param_merge($4, $7)) < 0) YYABORT;
}

static INSTRUCTION*
param_merge(INSTRUCTION* l1, INSTRUCTION* l2) {
  int n = l1->lasti->param_count;
  l1 = list_merge(l1, l2);
  l1->lasti->param_count += (1 + n);
  return l1;
}

eval.c:
case Op_func_call:
    if (strcmp(pc->func_name, "input") == 0) {
        char  s[512];
        NODE* prompt = POP();
        printf("%s", prompt->stptr);
        gets(s);
        PUSH(make_string(s, strlen(s)));
        break;
    }
opt_newlines不好去掉——$1等要重新数——如:
LEX_FOR '(' opt_simple_stmt ';' opt_newlines ';' opt_newlines opt_simple_stmt ')' opt_newlines statement
l_brace : '{' opt_newlines ;和r_brace : '}' opt_newlines  { yyerrok; } ; 如之奈何?
一个C(不是C++)的语法检查(没有代码生成也不是lint),.y文件会比1307行(%%所在的行)少好几百行,比我过去想象的短很多。日均代码不用万行,千行就可声称迈出了写C编译器的一大步。:-) 以上说法比较极端,因为没有考虑struct, typedef等等。宏则是预处理器的任务。

原源码里:

/* tail recursion optimization */
tail_optimize = ((pc + 1)->tail_call && do_optimize > 1 && !do_debug && !do_profile);
if (tail_optimize) { /* free local vars of calling frame */
说不定5.0还Just in time编译哩。

支持二维数组:

# 二维数组需awk 4.0
function fen_atoi(c) {
  switch (toupper(c)) {
  case "K": return 0; case "A": return 1; case "B": return 2; case "N": return 3;
  case "R": return 4; case "C": return 5; case "P": return 6;
  default: throw 0; return -1; # throw: untyped variable
  }
}

function FromFEN(str) {
  split(str, s, ""); i = 1;
  for (y = x = 0; c = s[i++];) {
    if (c == "/") { x = 0; ++y }
    else if (c >= "1" && c <= "9") x += c - "0";
    else {
      if (y > 9 || x > 8 || y < 0 || x < 0) throw 1;
      _brd[y][x++] = (c >= "a" ? 8 : 1) + fen_atoi(c);
    }
  }
}
BEGIN {
  #print(fen_atoi("0"));
  FromFEN("rnbakabnr/9/1c5c1/p1p1p1p1p/9/9/P1P1P1P1P/1C5C1/9/RNBAKABNR");
  print("123456789");
  Z = "·帅仕相马车砲兵將士象马车炮卒"; # ·的下标是1, PASCAL范
  for (y = 0; y < 10; y++) {
    for (x = 0; x < 9; x++) { printf("%s", substr(Z, _brd[y][x] + 1, 1)); }
    print;
  }
  print("九八七六五四三二一");
}

以上程序输出较多中文,因此有两个问题需要注意:

① D:\>gawk -f 2darray.awk
gawk: 2darray.awk:26: fatal: print to "standard output" failed (No space left on device)
[bug-gawk] fatal: print to "standard output" failed (Broken pipe) Ignore EPIPE on stdout if write error.
可通过重定向解决。如:gawk -f 2darray.awk >t.txt

② 以上文件用GBK编码,在中文Windows下无需特殊设置。
解决了一例awk中substr处理汉字字符串的bug 设置LANG为zh_CN.gbk比较保险。Windows下set var=value,Linux下export var=value

MadEdit和vim有语法高亮:

posted @ 2022-12-23 19:03  Fun_with_Words  阅读(31)  评论(0编辑  收藏  举报









 张牌。