近义词处理
昨天主要做近义词 加权处理,将抓来的数据根据第三方 数据统计每个词出现的次数,这样来判定每个词的权值,并对于每组的词找出权值最大的。
主要熟悉了:文件读取的方法:
主要问题:空行的处理,遇见空行,不能单纯的用NULL来判定结束,不然就会卡死。
对于每个存储单词的数组,每次要初始化
fp = fopen("文件", "w+r"); //设置文件指针,文件指针和别的 指针不一样,不能直接操作,要结合文件函数 进行读取
while ( fgets (strGet, 每行长度, fp)
{
对strGet串 进行读取; //文件的 读取是一行一行进行的
}
加权代码:
#include<stdio.h> #include<string.h> #include<ctype.h> #define X_Word_Long 210 int main() { FILE *fp,*fq,*fr; char *p,*q; char strWord[X_Word_Long] = ""; char ss[X_Word_Long] = ""; char str_read[X_Word_Long] = ""; char strNum[20] = ""; int num; int t =0; int len ,s, s_du; fp = fopen("1.1.txt","r++"); fq = fopen("log06_121.txt","r++"); fr = fopen("du.txt", "w+r"); memset(ss,0,sizeof(ss)); memset(strWord, 0 , sizeof(strWord)); while(fgets(ss,200,fp)!=NULL) { len = strlen(ss); s_du = 0; /*for(int i = 0 ;i<len ;i++) strWord[s_du++] = ss[i];*/ while(s_du < len) { s = 0; while(ss[s_du] != ' '&&ss[s_du] !='\0' && ss[s_du] != '\n') { strWord[s] =ss[s_du]; s++; s_du++; } num = 0; memset(str_read, 0 , sizeof(str_read)); while(fgets(str_read,200,fq)!= NULL) { p = strstr(str_read,strWord); while(p != NULL) { num++; q = p+1; p = strstr(q, strWord); } } fclose(fq); fq = fopen("log06_121.txt","r++"); while(ss[s_du] == ' ' || ss[s_du] =='\0' || ss[s_du] == '\n') s_du++; printf("%d\n", t++); fputs(strWord,fr); fputs("--",fr); printf("%s %d\n",strWord, num); if(num == 0) fputc('0',fr); else { s =0 ; while(num > 0) { strNum[s++] = num%10 + '0'; num = num /10; } for (int i = 0; i < s; i++) fputc(strNum[i],fr); } fputc(' ',fr); memset(strWord, 0 , sizeof(strWord)); } memset(ss,0,sizeof(ss)); fputc('\n',fr); } fclose(fp); fclose(fq); fclose(fr); return 0; }
判权代码:
#include<stdio.h> #include<string.h> #define X_LONG 300 #define MAXNUM -1 int main() { FILE *fp, *fq; char strGet[X_LONG] = ""; char strMax[X_LONG] = "",strAt[X_LONG] = ""; int len, max_num, num; int lenAt, lenFm; fp = fopen("du.txt","r+w"); fq = fopen("1.txt", "w+r"); memset(strGet, 0, sizeof(strGet)); memset(strMax, 0, sizeof(strMax)); while (fgets(strGet, 200, fp)) //取一句话 { len = strlen(strGet); lenFm = 0; max_num = MAXNUM; fputc('[',fq); while (lenFm < len) //内循环,一句话 { memset(strAt, 0, sizeof(strAt)); lenAt = 0; while (strGet[lenFm] != '-'&& lenFm < len) { strAt[lenAt++] = strGet[lenFm++]; } if(lenFm >= len) break; fputs(strAt, fq); fputs(" ", fq); lenFm+=2; num = 0; while (strGet[lenFm] != ' ') { num = num * 10 + strGet[lenFm] -'0'; lenFm++; } if (num >= max_num ) { strcpy(strMax, strAt); max_num = num; } while (strGet[lenFm] == ' ') ////内循环,去除空格 lenFm++; } fputs("||", fq); fputs(strMax, fq); fputc(']', fq); fputc('\n',fq); printf("%s\n", strAt); memset(strMax, 0 ,sizeof(strMax)); memset(strGet, 0 ,sizeof(strGet)); } return 0; }