1 /* 2 * 该程序用于计算语言的核心项集 3 * RexfieldVon 4 * 2013年8月24日21:19:25 5 */ 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <string.h> 9 #include <assert.h> 10 11 #ifndef bool 12 # define bool char 13 #endif 14 15 #ifndef true 16 # define true 1 17 #endif 18 19 #ifndef false 20 # define false 0 21 #endif 22 23 #define NEXTSIZE 256 24 25 struct TrieTreeNode 26 { 27 struct TrieTreeNode *Next[NEXTSIZE]; 28 bool Accepted; 29 }; 30 31 struct TrieTreeRoot 32 { 33 int NodeCount; 34 struct TrieTreeNode *Tree; 35 }; 36 37 struct Collection 38 { 39 char *Expression; // 产生式 40 struct Collection *next; 41 }; 42 struct CoreCollection 43 { 44 struct Collection *S; // 项集 45 int id; // 项集序号 46 bool marked; // 是否被处理 47 unsigned char *FeatureString; // 特征字串 48 int FeatureStringLength; // 特征字串长度 49 unsigned int FeatureHash; // 特征哈希 50 struct CoreCollection *next; 51 }; 52 struct Record 53 { 54 int RecordRow; // 当前最大项位 55 int RecordRowMax; // 最大分配项数 56 int **Record; // 记录指针 57 }; 58 /* 三级指针 59 * 第一级指向整个产生式组 60 * 第二级指向单个产生式 61 * 第三级指向产生式符号单元 62 * 约定:①所有的大写字母为非终结符②所有小写字母为终结符③'\377'为eof④'\0'为ε⑤'\376'为占位符· 63 */ 64 char*** GrammerRule; 65 /* 66 * 文法书写约定: 67 * 每个字符串表示一个单独的产生式 68 * 第一个字符为产生式左边的非终结符,由初始化引擎进行产生式归并 69 * 整个文法以 null 结束 70 */ 71 char *Grammer[] = 72 { 73 "GL", 74 "LLP", "LP", 75 "P(P)", "P()", 76 "\0" 77 }; 78 79 /* 80 * 构建 Trie 树并初始化 81 * 返回一个新的 Trie 根节点 82 */ 83 struct TrieTreeRoot *BuildTrieTree() 84 { 85 struct TrieTreeRoot *Root = (struct TrieTreeRoot *)malloc(sizeof(struct TrieTreeRoot)); 86 Root->NodeCount = 1; 87 Root->Tree = (struct TrieTreeNode *)malloc(sizeof(struct TrieTreeNode)); 88 memset(Root->Tree, '\0', sizeof(struct TrieTreeNode)); 89 return Root; 90 } 91 92 /* 93 * 插入新的字符串 94 * Root : struct TrieTreeRoot* 要操作的 Trie 树根节点 95 * Item : char* 要插入的字符串 96 */ 97 void InsertItem(struct TrieTreeRoot *Root, char *Item) 98 { 99 struct TrieTreeNode *Ptr = Root->Tree; 100 int index = 0; 101 unsigned char Charactor; 102 103 while ((Charactor = Item[index]) != '\0') 104 { 105 if (Ptr->Next[Charactor] == NULL) 106 { 107 Ptr->Next[Charactor] = (struct TrieTreeNode *)malloc(sizeof(struct TrieTreeNode)); 108 memset(Ptr->Next[Charactor], '\0', sizeof(struct TrieTreeNode)); 109 Root->NodeCount++; 110 } 111 Ptr = Ptr->Next[Charactor]; 112 index++; 113 } 114 115 Ptr->Accepted = true; 116 } 117 118 /* 119 * 递归序列化 Trie 树 120 * Node : struct TrieTreeNode* 当前操作的 Trie 节点 121 * WritePtr : unsigned char* 特征串写入指针 122 */ 123 unsigned char *DoFeature(struct TrieTreeNode *Node, unsigned char *WritePtr) 124 { 125 int i, count = 0; 126 unsigned char *ErgodicPtr; 127 128 *WritePtr = (unsigned char)Node->Accepted; // 写入节点是否接受 129 WritePtr++; 130 131 ErgodicPtr = WritePtr; // 记录集合起始地址 132 133 for (i = 0; i < NEXTSIZE; i++) // 将该组记录写入特征串 134 { 135 if (Node->Next[i] != NULL) 136 { 137 *WritePtr = (char)i; 138 WritePtr++; 139 count++; 140 } 141 } 142 143 *WritePtr = '\0'; // 写入组分隔符 144 WritePtr++; 145 146 for (i = 0; i < count; i++) // 递归调用处理所有边 147 { 148 WritePtr = DoFeature(Node->Next[ErgodicPtr[i]], WritePtr); 149 } 150 151 return WritePtr; 152 } 153 154 /* 155 * 取得 Trie 的特征串,即序列化 Trie 树 156 * Root : struct TrieTreeRoot* 要操作的 Trie 树根节点 157 * StringLength : int* 长度指针(为了返回二进制串而设置) 158 */ 159 unsigned char *GetFeatureString(struct TrieTreeRoot *Root, int *StringLength) 160 { 161 struct TrieTreeNode *Ptr = Root->Tree; 162 // 假设最坏情况下,每个节点只有一条边,那么存储该节点就需要三个单元(Accepted、边、分隔符) 163 // 但实际上真正用到的只有 3N-1 个字节 164 unsigned char *FeatureString = (unsigned char *)malloc(Root->NodeCount * 3); 165 unsigned char *WritePtr = FeatureString; 166 167 WritePtr = DoFeature(Ptr, WritePtr); 168 169 *StringLength = WritePtr - FeatureString; 170 return FeatureString; 171 } 172 173 /* 174 * 初始化文法序列 175 */ 176 void InitizationGrammerRule() 177 { 178 // 分配表头空间 179 GrammerRule = (char***)malloc(sizeof(int) * 128); 180 memset(GrammerRule, '\0', sizeof(int) * 128); 181 // 扫描整个文法记录每个非终结符产生式的个数 182 int UnterminalOp[127], index; 183 unsigned char Unterminal; 184 memset(UnterminalOp, '\0', 4 * 127); 185 for (index = 0; (Unterminal = Grammer[index][0]) != '\0'; index++) 186 { 187 UnterminalOp[Unterminal]++; 188 } 189 // 写入产生式 190 for (index = 0; (Unterminal = Grammer[index][0]) != '\0'; index++) 191 { 192 if(GrammerRule[Unterminal] == NULL) 193 { 194 GrammerRule[Unterminal] = (char**)malloc(sizeof(int) * (UnterminalOp[Unterminal] + 1)); 195 memset(GrammerRule[Unterminal], '\0', sizeof(int) * (UnterminalOp[Unterminal] + 1)); 196 } 197 // 找到空位 198 int blank = 0; 199 while (GrammerRule[Unterminal][blank] != '\0') {blank++;} 200 GrammerRule[Unterminal][blank] = &Grammer[index][1]; 201 } 202 } 203 204 /* 205 * 取得终结符数量 206 * return 终结符的数量 207 */ 208 int GetTerminalCount() 209 { 210 int i, TerminalCount = 0; 211 for (i = 0; i < 128; i++) 212 { 213 if (GrammerRule[i] != NULL) 214 { 215 int k = 0; 216 while (GrammerRule[i][k] != NULL) 217 { 218 int n = 0; 219 while (GrammerRule[i][k][n] != '\0') 220 { 221 char c = GrammerRule[i][k][n]; 222 if (c < 'A' || c > 'Z') 223 { 224 TerminalCount++; 225 } 226 n++; 227 } 228 k++; 229 } 230 } 231 } 232 return TerminalCount; 233 } 234 235 /* 236 * 递归取得 FIRST 集 237 * Token : unsigned char 需要打印的符号 238 * FIRST : char* FIRST集 239 * Ptr : int* FIRST集的位置指针 240 */ 241 void GetFIRST(unsigned char Token, char *FIRST, int *Ptr) 242 { 243 if (Token >= 'A' && Token <= 'Z' && GrammerRule[Token] != NULL) 244 { 245 int i = 0; 246 while (GrammerRule[Token][i] != NULL) 247 { 248 GetFIRST(GrammerRule[Token][i++][0], FIRST, Ptr); 249 } 250 } 251 else if (Token < 'A' || Token > 'Z') 252 { 253 FIRST[*Ptr] = Token; 254 *Ptr = *Ptr + 1; 255 } 256 } 257 258 /* 259 * 打印 LR(1) 项 260 * Item : struct Collection* 需要打印的项 261 */ 262 void PrintItem(struct Collection *Item) 263 { 264 printf("[%c ->", Item->Expression[0]); 265 int i = 1; 266 for(; Item->Expression[i + 1] != '\0'; i++) 267 { 268 printf(" "); 269 switch (Item->Expression[i]) 270 { 271 case '\377': 272 printf("<eof>"); 273 break; 274 case '\376': 275 printf("<@>"); 276 break; 277 default: 278 printf("%c", Item->Expression[i]); 279 break; 280 } 281 } 282 if (Item->Expression[i] == '\377') 283 { 284 printf(", <eof>]"); 285 } 286 else 287 { 288 printf(", %c]", Item->Expression[i]); 289 } 290 } 291 292 /* 293 * 打印项集 294 * Item : struct Collection* 需要打印的项集 295 */ 296 void PrintCollections(struct Collection *S) 297 { 298 printf("-------- Collection ---------\n"); 299 for (; S != NULL; S = S->next) 300 { 301 PrintItem(S); 302 printf("\n"); 303 } 304 printf("-----------------------------\n"); 305 } 306 307 /* 308 * 添加项到集合 309 * S : struct Collection* 项集 310 * Tail : struct Collection* 尾部指针 311 * LeftUnterminal : char 左非终结符 312 * Expression : char* 产生式 313 * PreviewSymbol : char 前瞻符号 314 */ 315 void AddItem(struct Collection *S, struct Collection **Tail, char *Expression) 316 { 317 if (Tail == NULL) {Tail = (struct Collection **)malloc(sizeof(struct Collection **)); (*Tail) = NULL;} 318 if ((*Tail) == NULL) {(*Tail) = S;} 319 while ((*Tail)->next != NULL) {(*Tail) = (*Tail)->next;} 320 // 检查是否重复 321 struct Collection *SPtr = S; 322 for (; SPtr != NULL; SPtr = SPtr->next) 323 { 324 if (SPtr->Expression != NULL && 325 Expression != NULL && 326 strcmp(SPtr->Expression, Expression) == 0) 327 { 328 return; 329 } 330 } 331 struct Collection *NewItem = (struct Collection*)malloc(sizeof(struct Collection)); 332 NewItem->Expression = strdup(Expression); 333 NewItem->next = NULL; 334 (*Tail)->next = NewItem; 335 (*Tail) = (*Tail)->next; 336 } 337 338 /* 339 * 闭包运算 340 * S : struct Collection* 项集 341 * TerminalCount : int 终结符个数 342 */ 343 void Closure(struct Collection *S, int TerminalCount) 344 { 345 bool CollectChanged; 346 struct Collection *Ptr = S, *Tail = S; 347 do // while (S is still changing) 348 { 349 CollectChanged = false; 350 while (Ptr != NULL) // for each item [A->β·Cζ,α]∈S 351 { 352 char *Placeholder = strchr(Ptr->Expression, '\376'); 353 if (Placeholder != NULL && 354 *(Placeholder + 2) != '\0' && 355 *(Placeholder + 1) != '\0') // 占位符不能在产生式尾(= =)更不能在前瞻符号的位置上(= =#)! 356 { 357 unsigned char Unterminal = *(Placeholder + 1); 358 if (Unterminal >= 'A' && Unterminal <= 'Z') 359 { 360 int ProductionIndex; 361 for (ProductionIndex = 0; GrammerRule[Unterminal][ProductionIndex] != NULL; ProductionIndex++) // for each production C->γ∈P 362 { 363 char *FIRST = (char*)malloc(TerminalCount + 1), FirstSymbol = *(Placeholder + 2); 364 memset(FIRST, '\0', TerminalCount + 1); 365 int FIRSTCount = 0, i; 366 GetFIRST(FirstSymbol, FIRST, &FIRSTCount); 367 for (i = 0; i < FIRSTCount; i++) // for each b∈FIRST(ζα) 368 { 369 if (FIRST[i] != '\0') // S <- S∪{[C->·γ,b]} 370 { 371 char *Expr, *GRExpr = GrammerRule[Unterminal][ProductionIndex]; 372 int GRExprLength = strlen(GRExpr); 373 Expr = (char*)malloc(2 + GRExprLength + 1 + 1); 374 Expr[0] = Unterminal; 375 Expr[1] = '\376'; 376 memcpy(Expr + 2, GRExpr, GRExprLength); 377 Expr[2 + GRExprLength + 1 - 1] = FIRST[i]; 378 Expr[2 + GRExprLength + 1 + 1 - 1] = '\0'; 379 AddItem(S, &Tail, Expr); 380 CollectChanged = true; 381 } 382 } 383 } 384 } 385 } 386 Ptr = Ptr->next; 387 } 388 } 389 while (CollectChanged == true); 390 } 391 392 /* 393 * Goto 运算 394 * S : struct Collection* 项集 395 * Symbol : char 前瞻符号 396 * TerminalCount : int 终结符个数 397 */ 398 struct Collection *Goto(struct Collection *S, char Symbol, int TerminalCount) 399 { 400 // moved <- 空集 401 struct Collection *Moved = (struct Collection*)malloc(sizeof(struct Collection)); 402 memset(Moved, '\0', sizeof(struct Collection)); 403 struct Collection *Tail = Moved; 404 while (S != NULL) // for each item i∈S 405 { 406 char *Placeholder = strchr(S->Expression, '\376'); 407 if (Placeholder != NULL && *(Placeholder + 1) == Symbol) // if the form of i is [α->β·xζ,a] then 408 { 409 char *Expr = strdup(S->Expression); 410 Placeholder = strchr(Expr, '\376'); 411 *Placeholder = Symbol; 412 *(Placeholder + 1) = '\376'; 413 AddItem(Moved, &Tail, Expr); // moved <- moved∪{[α->βx·ζ,a]} 414 } 415 S = S->next; 416 } 417 struct Collection *FreeNode = Moved; 418 Moved = Moved->next; 419 free(FreeNode); 420 Closure(Moved, TerminalCount); // return closure(moved) 421 return Moved; 422 } 423 424 /* 425 * 可以计算字串的 ELFHash 426 * str : unsigned char* 字串 427 * length : int 字串长度 428 */ 429 unsigned int ELFHash_Bin(unsigned char *str, int length) 430 { 431 int i = 0; 432 unsigned int hash = 0, x = 0; 433 while (i < length) 434 { 435 hash = (hash << 4) + (str[i++]); 436 if ((x = hash & 0xF0000000L) != 0) 437 { 438 hash ^= (x >> 24); 439 hash &= ~x; 440 } 441 } 442 return (hash & 0x7FFFFFFF); 443 } 444 445 /* 446 * 完成特征值计算 447 * CC : struct CoreCollection* 要计算特征值的核心项集 448 */ 449 void CompleteFeature(struct CoreCollection *CC) 450 { 451 struct TrieTreeRoot *TrieRoot = BuildTrieTree(); 452 struct Collection *SPtr; 453 for (SPtr = CC->S; SPtr != NULL; SPtr = SPtr->next) 454 { 455 InsertItem(TrieRoot, SPtr->Expression); 456 } 457 CC->FeatureString = GetFeatureString(TrieRoot, &CC->FeatureStringLength); 458 CC->FeatureHash = ELFHash_Bin(CC->FeatureString, CC->FeatureStringLength); 459 } 460 461 /* 462 * 检查核心项集是否存在,并返回项集 ID 463 * CC : struct CoreCollection* 核心项集 464 * S : struct CoreCollection* 待检测的项集 465 */ 466 int CollectionExist(struct CoreCollection *CC, struct CoreCollection *S) 467 { 468 // 计算集合 S 的特征码 469 CompleteFeature(S); 470 // 开始逐个比较特征 471 struct CoreCollection *CCPtr = CC; 472 for (; CCPtr != NULL; CCPtr = CCPtr->next) 473 { 474 if (CCPtr->FeatureString == NULL || 475 CCPtr->FeatureHash == 0 || 476 CCPtr->FeatureStringLength == 0) 477 { 478 CompleteFeature(CCPtr); 479 } 480 if (CCPtr->FeatureHash == S->FeatureHash && 481 CCPtr->FeatureStringLength == S->FeatureStringLength && 482 memcmp(CCPtr->FeatureString, S->FeatureString, S->FeatureStringLength) == 0) 483 { 484 return CCPtr->id; 485 } 486 } 487 return -1; 488 } 489 490 /* 491 * 添加项集到核心项集 492 * CC : struct CoreCollection* 核心项集 493 * Tail : struct CoreCollection** 核心项集的尾部指针 494 * S : struct Collection* 待添加的项集 495 * CCid : int 上一个核心项集的 ID 496 */ 497 int AddCoreCollection(struct CoreCollection *CC, struct CoreCollection **Tail, struct Collection *S, int CCid) 498 { 499 if (Tail == NULL) {Tail = (struct CoreCollection **)malloc(sizeof(struct CoreCollection **)); (*Tail) = NULL;} 500 if ((*Tail) == NULL) {(*Tail) = CC;} 501 while ((*Tail)->next != NULL) {(*Tail) = (*Tail)->next;} 502 503 struct CoreCollection *CCItem = (struct CoreCollection*)malloc(sizeof(struct CoreCollection)); 504 CCItem->id = CCid + 1; 505 CCItem->marked = false; 506 CCItem->S = S; 507 CCItem->next = NULL; 508 509 int id = CollectionExist(CC, CCItem); 510 if (id == -1) // if temp!∈CC 511 { 512 id = CCItem->id; 513 (*Tail)->next = CCItem; // CC <- {CC0} 514 (*Tail) = (*Tail)->next; 515 } 516 return id; 517 } 518 519 /* 520 * 记录 Goto[CCi, symbol]->CCj 521 * RecordTable : struct Record* 记录表 522 * CCi : int 当前项集 ID 523 * Symbol : unsigned char 转移符号 524 * CCj : int 转移目的项集 ID 525 */ 526 void Record(struct Record *RecordTable, int CCi, unsigned char Symbol, int CCj) 527 { 528 // [CCi, Symbol] -> CCj 529 if (RecordTable->RecordRow < CCi) // 新请求的位置大于最大项位,需要更新项位 530 { 531 // 一次分配 32 条记录空间 532 if (RecordTable->RecordRowMax <= CCi) // 新请求的位置超过最大可使用项数,追加新的项表空间 533 { 534 RecordTable->RecordRowMax = ((int)(CCi / 32) + 1) * 32; 535 RecordTable->Record = (int **)realloc(RecordTable->Record, RecordTable->RecordRowMax); 536 } 537 RecordTable->RecordRow = CCi; 538 539 int *tmp_spc = (int*)malloc(sizeof(int) * 256); 540 memset(tmp_spc, '\0', sizeof(int) * 256); 541 RecordTable->Record[CCi] = tmp_spc; 542 } 543 if (RecordTable->Record[CCi][Symbol] == CCj) 544 { 545 // printf("Find Repeat.\n"); 546 } 547 else if (RecordTable->Record[CCi][Symbol] != 0) 548 { 549 printf("Find Conflict.\n"); 550 } 551 else 552 { 553 RecordTable->Record[CCi][Symbol] = CCj; 554 printf("[CC%d, %c] -> CC%d\n", CCi, Symbol, CCj); 555 } 556 } 557 558 /* 559 * 计算 LR 核心项集以及 Goto 表 560 */ 561 void LRCollection() 562 { 563 int TerminalCount = GetTerminalCount(), CCid = 0; 564 565 struct Record *RecordTable = (struct Record *)malloc(sizeof(struct Record)); 566 memset(RecordTable, '\0', sizeof(struct Record)); 567 RecordTable->RecordRow = -1; 568 RecordTable->RecordRowMax = 32; 569 RecordTable->Record = (int **)malloc(sizeof(int) * 32); 570 571 struct Collection *S = (struct Collection*)malloc(sizeof(struct Collection)); 572 memset(S, '\0', sizeof(struct Collection)); 573 S->Expression = strdup("G\376L\377"); 574 S->next = NULL; 575 Closure(S, TerminalCount); // CC0 <- closure({[S -> · S', eof]}) 576 577 struct CoreCollection *CC = (struct CoreCollection*)malloc(sizeof(struct CoreCollection)), *CCPtr, *CCTail; 578 CC->id = 0; 579 CC->marked = false; 580 CC->S = S; // CC <- {CC0} 581 CC->next = NULL; 582 CompleteFeature(CC); 583 CCTail = CC; 584 585 for (CCPtr = CC; CCPtr != NULL; CCPtr = CCPtr->next) // while (new sets are still being added to CC) 586 { 587 if (CCPtr->marked == false) // for each unmarked set CCi∈CC 588 { 589 CCPtr->marked = true; // mark CCi as processed 590 struct Collection *ExprPtr = NULL; 591 for (ExprPtr = CCPtr->S; ExprPtr != NULL; ExprPtr = ExprPtr->next) // for each x following a · in an item in CCi 592 { 593 char *Placeholder = strchr(ExprPtr->Expression, '\376'); 594 if (Placeholder != NULL && *(Placeholder + 1) != '\0' && *(Placeholder + 2) != '\0') 595 { 596 unsigned char PrevSym = *(Placeholder + 1); 597 struct Collection *temp = Goto(CCPtr->S, PrevSym, TerminalCount); // temp <- goto(CCi, x) 598 int temp_id = AddCoreCollection(CC, &CCTail, temp, CCid); 599 if (temp_id > CCid) 600 { 601 printf("Goto(CC%d, %c):\n", CCPtr->id, PrevSym); 602 PrintCollections(temp); 603 CCid++; // 意味着新的 CCID 被分配 604 } 605 // record transition form CCi to temp on X 606 Record(RecordTable, CCPtr->id, PrevSym, temp_id); 607 printf("\n"); 608 } 609 } 610 } 611 } 612 } 613 614 int main(int argc, char **argv) 615 { 616 InitizationGrammerRule(); // 初始化文法 617 618 LRCollection(); 619 return 0; 620 }