BJFU18级-数据结构课程设计单词统计系统(181002222)

#include "pch.h"
#include ".\\00提示.h"
#include ".\\02二叉排序树.h"
#include ".\\03哈希表.h"
#include ".\\自定义算法.h"
using namespace std;
/*本程序不适用于末尾没有标点的文本,不然会爆炸*/

//case 1
void oporder();
void oporderlist();
void orderlist(char *w[], int j, int i, int num[],int wordrawsum);
void search_bin(char *w[], int j, int num[]);
void linklist();
//case 2
int treenode;//树节点数==单词数
double avl;//平均查找长度
int findnum = 0;//这个单词的查找次数,查一次算一次
int findsum = 0;//所有单词查找的总次数,已经统计好
void btree();
bool findtree(Btree T, char* word, bool flag);
void insertree(Btree &T, char* word);
void inorder(Btree T);
int res;
int Height(Btree p);
void countNumOfLevel(Btree root, int depth);
//Btree search_bintree(char *w[],char* word,int num[],int j);
fstream w4("OutFile4.txt", ios::out);
//case 3
void ophash();
void linehash();
void linkhash();
const int maxa = 5001;

int main()
{
    int op;
    while (1)
    {
        headline();
        cin >> op;
        if (op == 4) break;
        switch (op)
        {
            case 1 : {
                printf("基于线性表的查找\n");
                oporder(); break;}
            case 2: {
                printf("基于二叉排序树的查找\n");
                btree(); break;}
            case 3: {
                printf("基于哈希表的查找\n");
                ophash(); break;
            }
            default: {
                printf("请输入有效选项!\n");
                break;
            }
        }
   }
    return 0;
}

/***********
 *1.线性表 *
 ***********/
void oporder(){
    int op;
    while (1)
    {
        printf("请按下数字键进行功能选择(不能输入非法字符!):\n");
        printf("(1):顺序表查找\n");
        printf("(2):链表查找\n");
        printf("(3):返回上一级\n");
        scanf("%d", &op);
        if (op == 1) oporderlist();
        else if (op == 2) linklist();
        else if (op == 3) break;
    }
}

void oporderlist()//a. 选择 
{
    int op;
    //读取且构建 
    char word[33];//临时单词 
    char **w = new char*[5003];//单词数组 
    int num[5003];//单词词频 
    char buffer;//临时字符 
    int i = 0;
    int j = 0;
    int wordsdrawsum = 0;
    fstream w1("OutFile1.txt", ios::out);
    ifstream r("InFile.txt", ios::in);
    r.get(buffer);
    buffer = tolower(buffer);
    while (!r.eof()) {
        while (1)
        {
            if (buffer!='\''&&(buffer<'a' || buffer>'z' || buffer == ' '))//如果当前字符不是字母 
                break;//那就结束 
            word[i++] = buffer;//顺序表下标在下一趟+1,方便下一个字母进入 
            if(!r.eof())
                r.get(buffer);//读入下一个字符 
            else break;
            buffer = tolower(buffer);//转换成小写,避免因为大小写不同导致词频不同 
        }
        w[j] = (char *)malloc(sizeof(word));//每次新单词都申请新的空间 ,因为  j=j-1可能会导致不小心访问到该点输出重复的单词 
        if (i > 0 && word[0] >= 'a'&&word[0] <= 'z')//避免读入空格等非法字符,导致word[i]多了 
        {
            word[i] = '\0';//众所周知,字符串末尾需要加\0 否则崩
            wordsdrawsum++;
            strcpy(w[j], word);
            num[j] = 1;//默认新单词原始词频为1 
            for (int k = 0; k < j; ++k)
            {
                if (strcmp(w[k], w[j]) == 0)//比较是否出现过这个单词 
                {
                    num[k] += 1;//出现过,那么之前的单词数加一 
                    j = j - 1;//这个下标所代表的重复单词作废 
                    break;
                }
            }
            j++;//顺序表下表+1,准备下一个单词的读入    
        }
        i = 0;
        r.get(buffer);
        buffer = tolower(buffer);
    }
    /*利用快速排序按字典序排列好*/
    quick(w, num, 0, j - 1);
    while (1)
    {
        printf("请按下数字键进行功能选择(不能输入非法字符!):\n");
        printf("(1):顺序查找\n");
        printf("(2):折半查找\n");
        printf("(3):返回上一级\n");
        scanf("%d", &op);
        if (op == 1) orderlist(w, j, i, num,wordsdrawsum);
        else if (op == 2) search_bin(w, j, num);
        else if (op == 3) break;
    }
    delete w;
}

//01顺序表的顺序查找 
void orderlist(char *w[], int j, int i, int num[],int wordsdrawsum) {
    int op;
    while (1)
    {
        function();
        scanf("%d", &op);
        if (op == 1)    /*词频统计(输出到OutFile)*/
        {
            fstream w1("OutFile1.txt", ios::out);
            ifstream r("InFile.txt", ios::in);
            w1 << j << endl;
            for (i = 0; i < j; i++)
            {
                w1 << w[i] << ' ' << num[i];
                if (i != j)
                    w1 << endl;
            }
            printf("统计完成,请查看对应OutFile1.txt文件\n");
        }
        else if (op == 2)
        {
            /*单词查找,记录时间,在已经排序好的顺序表里查找*/
            char find[50];
            printf("请输入你要查询的单词 : ");
            cin >> find;
            bool flag = 0;
            LARGE_INTEGER timebegin;//开始计时
            LARGE_INTEGER timeend;//结束计时
            LARGE_INTEGER frequency;//计时器频率
            QueryPerformanceFrequency(&frequency);
            double quadpart = (double)frequency.QuadPart;
            QueryPerformanceCounter(&timebegin);
            for (i = 0; i< j; i++)//Page 192顺序查找是从表的最后开始查找 ?
            {
                if (strcmp(find, w[i]) == 0)
                {
                    flag = 1;
                    QueryPerformanceCounter(&timeend);
                    break;
                }
            }
            if (!flag) cout << "查找失败" << endl;
            else {
                cout << "此单词的词频为: " << num[i] << endl;
                cout << "查找该单词所花的时间";
                printf("%.15lf\n", (double)(timeend.QuadPart - timebegin.QuadPart) / quadpart*100000);
                cout << "平均查找长度";
                avl = (j + 1) / 2.0;
                printf("%.1lf\n", avl);
            }
        }
        /*else if (op == 4) {
        printf("请注意,此操作会变更初始读取的单词表!这项操作结束后需返回上一级重新进行新选择\n");
            int q = j;
            for (i = 0; i < q; ++i) {
                double fre = (double)(num[i] * 1.0 / wordsdrawsum * 1.0);
                cout <<i<<' ' <<fre << num[i] << ' ' << w[i] << endl;
                if (fre < 0.10) {
                    num[i] = -1;
                }
            }
            int qq = j;
            int qqq = 1;
            for (i = 0; i < j; i++)
            {
                if (num[i] == -1) {
                    for (int k = i; k < qq - 1; ++k)
                    {
                        cout << k<< ' ' << num[k] << ' ' << w[k] << endl;
                        w[k] = w[k + 1];
                        num[k] = num[k + 1];
                        qq--; 
                    }
                    i = 0; qqq = -qqq;
                }
                
            }
            fstream w1("OutFile1.txt", ios::out);

            w1 << q<< endl;
            for (i = 0; i < q; i++)
            {
                w1 << w[i] << ' ' << num[i];
                if (i != j)
                    w1 << endl;
            }
            printf("统计完成,请查看对应OutFile1.txt文件\n");
        }*/
        else break;
        cout << "按任意数字键返回" << endl;
        scanf("%d", &op);
    }

}

//02链表的顺序查找  
void linklist() {
    //链表创建以及读取
    typedef struct Words {
        int num;
        char data[50];
        Words *next;
    }Words, *Linklist;
    while (1)
    {
        char word[33];//临时单词 
        char buffer;//临时字符 
        int i = 0;
        int j = 0;
        fstream w2("OutFile2.txt", ios::out);
        ifstream r("InFile.txt", ios::in);
        r.get(buffer);
        buffer = tolower(buffer);
        Words *L = NULL;
        L = new Words;
        L->next = NULL;
        Words *q = new Words;
        q = L->next;
        while (!r.eof()) {
            while (1)
            {
                if (buffer != '\'' && (buffer<'a' || buffer>'z' || buffer == ' '))//如果当前字符不是字母 
                    break;//那就结束 
                word[i++] = buffer;//顺序表下标在下一趟+1,方便下一个字母进入 
                r.get(buffer);//读入下一个字符 
                buffer = tolower(buffer);//转换成小写,避免因为大小写不同导致词频不同 
            }
            if (i > 0 && word[0] >= 'a'&&word[0] <= 'z')//避免读入空格等非法字符,导致word[i]多了 
            {
                word[i] = '\0';    //cout<<"dsad a"<<endl;
                Words *p = new Words;//每次新单词都申请新的空间 
                p->num = 1;//默认新单词原始词频为1 
                strcpy(p->data, word);
                p->next = NULL;
                Words* pos = new Words;
                pos = L;
                while (pos != NULL)
                {
                    //printf("test %s %s\n",pos->data,word);
                    if (strcmp(pos->data, word) == 0)//比较是否出现过这个单词 
                    {
                        pos->num = pos->num + 1;//出现过,那么之前的单词数加一 
                        p = NULL; break;
                    }
                    if (pos->next != NULL)
                        pos = pos->next;
                    else break;
                }
                if (p != NULL) {//单词第一次出现 ,因为如果出现过了,上面已经把p变成空指针了,P不空说明没有重复 
                    pos->next = p; j++;     //    cout<<"j="<<j<<endl;
                }
            }
            i = 0;
            r.get(buffer);
            buffer = tolower(buffer);
        }

        int op;
        function();
        scanf("%d", &op);
        //用链表写入txt
        if (op == 1)
        {
            //利用节点值交换法将链表排序 
            Words *u = new Words; u = L->next;
            Words *v = new Words;
            for (; u != NULL; u = u->next)//暴力每个节点比较
                for (v = u->next; v != NULL; v = v->next)
                    if (cmpstr(v->data, u->data)) {//v本来在u后面,现在v比u字典序小
                        char temp[50];
                        strcpy(temp, v->data); strcpy(v->data, u->data); strcpy(u->data, temp);//交换单词内容
                        int t;
                        t = v->num; v->num = u->num; u->num = t;//交换词频
                    }
            //开始写入 
            q = L->next; w2 << j << endl;
            while (q != NULL)
            {
                w2 << q->data << " " << q->num << endl;
                q = q->next;
            }
            printf("统计完成,请查看对应OutFile2.txt文件\n");
        }
        else if (op == 2) {//链表的顺序查找 
            char find[50];
            printf("请输入你要查询的单词 : ");
            cin >> find;
            bool flag = 0;
            LARGE_INTEGER timebegin;//开始计时
            LARGE_INTEGER timeend;//结束计时
            LARGE_INTEGER frequency;//计时器频率
            QueryPerformanceFrequency(&frequency);
            double quadpart = (double)frequency.QuadPart;
            QueryPerformanceCounter(&timebegin);
            q = L->next;
            while (q != NULL)//从头遍历
            {
                if (strcmp(find, q->data) == 0)
                {
                    flag = 1;
                    QueryPerformanceCounter(&timeend);
                    break;
                }
                q = q->next;
            }
            if (!flag)
            {
                printf("查找失败\n");
            }
            else {
                cout << "此单词的词频为: " << q->num << endl;
                cout << "查找该单词所花的时间";
                printf("%.15lf\n", (double)(timeend.QuadPart - timebegin.QuadPart) / quadpart*100000);
                cout << "平均查找长度";
                avl = (j + 1) / 2.0;
                printf("%.1lf\n", avl);
            }
        }
        else break;
        cout << "按任意数字键返回" << endl;
        scanf("%d", &op);
    }
}
//03折半查找(顺序表) 
void  search_bin(char *w[], int j, int num[])
{
    int op;
    while (1)
    {
        function();
        scanf("%d", &op);
        if (op == 1)
        {
            fstream w3("OutFile3.txt", ios::out);
            ifstream r("InFile.txt", ios::in);
            w3 << j << endl;
            for (int i = 0; i < j; i++)
            {
                w3 << w[i] << ' ' << num[i];
                if (i != j)
                    w3 << endl;
            }
            printf("统计完成,请查看对应OutFile3.txt文件\n");
        }
        else if (op == 2)
        {
            char find[50];
            printf("请输入你要查询的单词 : ");
            cin >> find;
            double asl= 0;
            bool flag = 0;
            LARGE_INTEGER timebegin;//开始计时
            LARGE_INTEGER timeend;//结束计时
            LARGE_INTEGER frequency;//计时器频率
            QueryPerformanceFrequency(&frequency);
            double quadpart = (double)frequency.QuadPart;
            QueryPerformanceCounter(&timebegin);
            int low = 0, high = j - 1; int mid;//折半查找
            while (low <= high)
            {
                mid = (low + high) / 2;
                if (strcmp(find, w[mid]) == 0)//一样
                {
                    flag = 1;
                    QueryPerformanceCounter(&timeend);
                    break;
                }
                else if (cmpstr(find, w[mid])) {//更小
                    high = mid - 1;
                }
                else low = mid + 1;//更大
            }
            if (!flag) cout << "查找失败" << endl;
            else {
                cout << "此单词的词频为: " << num[mid] << endl;
                cout << "查找该单词所花的时间";
                printf("%.15lf\n", (double)(timeend.QuadPart - timebegin.QuadPart) / quadpart*100000);
        /*        //构建折半查找的二叉树
                Btree bintree=search_bintree(w,find,num,j);
                inorder(bintree);*/
                cout << "平均查找长度" ;
                asl = (j+1)/j*(log(j+1)/log(2)-1);
                printf("%.1lf\n", asl);
            }

        }
        else  break;
        cout << "按任意数字键返回" << endl;
        scanf("%d", &op);
    }
}
void btree()
{
    int op;
    char word[33];//临时单词 
    char buffer;//临时字符 
    int i = 0;
    int j = 0;
    ifstream r("InFile.txt", ios::in);
    r.get(buffer);
    buffer = tolower(buffer);
    Btree T = new Bnode;
    T->num = 0;
    T->lchild = NULL; T->rchild = NULL;
    treenode = 0;
    findsum = 0;
    while (!r.eof()) {
        while (1)
        {
            if (buffer != '\'' && (buffer<'a' || buffer>'z' || buffer == ' '))
                break;//那就结束 
            word[i++] = buffer;//顺序表下标在下一趟+1,方便下一个字母进入 
            r.get(buffer);//读入下一个字符 
            buffer = tolower(buffer);//转换成小写,避免因为大小写不同导致词频不同 
        }
        if (i > 0 && word[0] >= 'a'&&word[0] <= 'z')//避免读入空格等非法字符,导致word[i]多了 
        {
            word[i] = '\0';
            treenode++;
            insertree(T, word);
        }
        i = 0;
        r.get(buffer);
        buffer = tolower(buffer);
    }
    while (1)
    {
        function();
        scanf("%d", &op);
        if (op == 1)    /*词频统计(输出到OutFile)*/
        {
            w4.seekp( 0,ios::beg);
            w4 << treenode << endl;
            //中序遍历,刚好从小到大
            inorder(T);
            printf("统计完成,请查看对应的OutFile4.txt文件\n");
        }
        else if (op == 2)
        {
            /*单词查找,记录时间,在已经排序好的顺序表里查找*/
            char find[50];
            printf("请输入你要查询的单词 : ");
            cin >> find;
            bool flag = 0;
            avl = 0;
            findnum = 0;
            LARGE_INTEGER timebegin;//开始计时
            LARGE_INTEGER timeend;//结束计时
            LARGE_INTEGER frequency;//计时器频率
            QueryPerformanceFrequency(&frequency);
            double quadpart = (double)frequency.QuadPart;
            QueryPerformanceCounter(&timebegin);
            flag = findtree(T, find, 0);
            QueryPerformanceCounter(&timeend);
            if (!flag) cout << "查找失败" << endl;
            else {
                //    cout<<"此单词的词频为: "<<<<endl;
                cout << "查找该单词所花的时间";
                printf("%.15lf\n", (double)(timeend.QuadPart - timebegin.QuadPart) / quadpart * 100000);
                cout << "平均查找长度";
                int sum = 0;//全树的查找次数之和
                res = 0;
                for (int k = 1; k < Height(T); ++k)
                {
                    countNumOfLevel(T, k);
                    sum += res*k;//第K层有res个节点, 节点数res*k这层的深度就是这层的查找次数之和
                    res = 0;
                }
                cout <<  (double)(sum*1.0 / treenode*1.0)<< endl;
            }
        }
        else break;
        cout << "按任意数字键返回" << endl;
        scanf("%d", &op);
    }
}
void insertree(Btree &T, char* word) {

    if (T == NULL) {
        Bnode* b = new Bnode;
        strcpy(b->data, word);
        b->num = 1;//默认新单词原始词频为1
        findsum++;
        T = b;
        b->lchild = b->rchild = NULL;
        return;
    }
    else if (strcmp(T->data, word) == 0) {//一样
        T->num = T->num + 1;
        treenode--;
        return;
    }
    else if (cmpstr(T->data, word)) {//T的字典序更小,访问T的右子树
        findsum++;
        insertree(T->rchild, word);
    }
    else {//T的字典序更大访问T的左子树 
        findsum++;
        insertree(T->lchild, word);
    }

}
bool findtree(Btree T, char* word, bool flag)
{
    if (!T) {
        return flag;
    }
    if (strcmp(T->data, word) == 0) {//一样
        flag = 1;
        cout << "此单词的词频为: " << T->num << endl; 
        return flag;
    }
    else if (cmpstr(T->data, word)) {//T的字典序更小,访问T的右子树
        flag = findtree(T->rchild, word, flag);
    }
    else if (!cmpstr(T->data, word)) {//T的字典序更大访问T的左子树 
        flag = findtree(T->lchild, word, flag);
    }
    return flag;
}
void inorder(Btree T)
{
    if (T)
    {
        inorder(T->lchild);
        if (T->num != 0)//不知道为什么有NULL的T 
        {
            w4 << T->data << " " << T->num << endl;
        }    
        inorder(T->rchild);
    }
}
/***********
 *3.哈希表 *
 ***********/
void ophash() //f
{
    int op;
    while (1)
    {
        printf("请按下数字键进行功能选择(不能输入非法字符!):\n");
        printf("(1):基于开放地址法的哈希查找\n");
        printf("(2):基于链地址法哈希查找 \n");
        printf("(3):返回上一级\n");
        scanf("%d", &op);
        if (op == 1) linehash();
        else if (op == 2) linkhash();
        else if (op == 3) return;
    }

}
void linehash()
{
    int op;
    struct hashlist {
        int cmp;//比较次数 
        int num;//词频 
        char data[50];//单词 
    }h[maxa];
    struct drawlist {
        int address;//散列地址 
        int num;//词频 
        char data[50];//单词 
    }d[maxa];
    while (1)
    {
        for (int k = 0; k < maxa; k++)
        {
            h[k].num = d[k].num = 0;//初始化 
        }
        //读取且构建 
        char word[33];//临时单词 
        char buffer;//临时字符 
        int i = 0;
        int j = 0;
        ifstream r("InFile.txt", ios::in);
        r.get(buffer);
        buffer = tolower(buffer);
        
        //第一步 读入单词,并计算出相应的散列地址
        while (!r.eof()) {
            while (1)
            {
                if(buffer != '\'' && (buffer<'a' || buffer>'z' || buffer == ' '))//如果当前字符不是字母 
                    break;//那就结束 
                word[i++] = buffer;//顺序表下标在下一趟+1,方便下一个字母进入 
                r.get(buffer);//读入下一个字符 
                buffer = tolower(buffer);//转换成小写,避免因为大小写不同导致词频不同 
            }

            if (i > 0 && word[0] >= 'a'&&word[0] <= 'z')//避免读入空格等非法字符,导致word[i]多了 
            {
                word[i] = '\0';
                strcpy(d[j].data, word);
                d[j].num = 1;
                d[j].address = hashnum(d[j].data);
                for (int k = 0; k < j; ++k)
                {
                    if (strcmp(d[k].data, d[j].data) == 0)//比较是否出现过这个单词 
                    {
                        d[k].num += 1;//出现过,那么之前的单词数加一 
                        j = j - 1;//这个下标所代表的重复单词作废 
                        break;
                    }
                }
                j++;//顺序表下表+1,准备下一个单词的读入    
            }
            i = 0;
            r.get(buffer);
            buffer = tolower(buffer);
        }
        double asl = 0;
        //第二步 根据单词表以及对应的散列地址放入哈希表里 ,看看有没有发生冲突
        for (int k = 0; k < j; ++k)
        {
            if (!h[d[k].address].num)//空的也 
            {
                h[d[k].address].num = d[k].num;
                strcpy(h[d[k].address].data, d[k].data);
                h[d[k].address].cmp = 0;
                asl += 1;
            }
            else {//发生冲突嗷 
                bool overflows = 1;//万一向后溢出 
                for (int z = d[k].address + 1; z < maxa; z++)
                {
                    if (!h[z].num)//空的也     //如果没有,那就继续,如果发生冲突,那就放入下一个不冲突的地址 (散列值+n) ---计算冲突次数
                    {
                        h[z].num = d[k].num;
                        strcpy(h[z].data, d[k].data);
                        h[z].cmp = z - d[k].address;
                        asl += h[z].cmp;
                        overflows = 0;
                        break;
                    }
                }
                if (overflows) {
                    for (int z = 0; z < d[k].address; z++)
                    {
                        if (!h[z].num)//空的也     //如果没有,那就继续,如果发生冲突,那就放入下一个不冲突的地址 (散列值+n) ---计算冲突次数
                        {
                            h[z].num = d[k].num;
                            strcpy(h[z].data, d[k].data);
                            h[z].cmp = z - d[k].address;
                            asl += h[z].cmp;
                            overflows = 0;
                            break;
                        }
                    }
                }
                if (overflows) printf("散列表长度不足!\n");
            }
        }
        asl = asl / j * 1.0;
        function();
        cin >> op;
        // ASL=每个单词的比较次数 / 单词个数 (默认是成功的) 

        if (op == 1)    /*词频统计(输出到OutFile)*/
        {
            fstream w5("OutFile5.txt", ios::out);
            w5 << j << endl;
        //散列表排序
            int n = j;
            j = j - 1;
            for (i = 0; i < maxa; i++)//先放入顺序表里,其实放不放都一样,只是为了避免破坏哈希表
            {
                if (h[i].num != 0)
                {
                    strcpy(d[j].data, h[i].data);
                    d[j--].num = h[i].num;
                }
            }
            //冒泡排序
            for(int u=0;u<n-1;u++)
                for (int v = 0; v < n-u-1; v++) 
                    if (!cmpstr(d[v].data, d[v + 1].data)) {
                        char temp[50];
                        strcpy(temp, d[v].data);
                        strcpy(d[v].data, d[v+1].data);
                        strcpy(d[v+1].data,temp);
                        int t = d[v].num;
                        d[v].num = d[v + 1].num;
                        d[v + 1].num = t;
                    }

            for (int i = 0; i<n; i++)
            {
                w5 << d[i].data << ' ' << d[i].num << endl;
            }
            printf("统计完成,请查看对应OutFile5.txt文件\n");
        }
        else if (op == 2)
        {
            /*单词查找,记录时间,在已经排序好的顺序表里查找*/
            char find[50];
            printf("请输入你要查询的单词 : ");
            cin >> find;
            bool flag = 0;
            LARGE_INTEGER timebegin;//开始计时
            LARGE_INTEGER timeend;//结束计时
            LARGE_INTEGER frequency;//计时器频率
            QueryPerformanceFrequency(&frequency);
            double quadpart = (double)frequency.QuadPart;
            QueryPerformanceCounter(&timebegin);
            int findnum = hashnum(find);
            if (h[findnum].num&&strcmp(find,h[findnum].data)==0) {//如果这个散列表下标和存储单词一样
                flag = 1; QueryPerformanceCounter(&timeend);
            }
            else if(h[findnum].num){//和存储单词不一样orz
                for (int z = findnum + 1; z < maxa; z++) {
                    if (strcmp(find, h[z].data) == 0) {
                        flag = 1; QueryPerformanceCounter(&timeend); findnum = z;
                        break;
                    }
                    if (!flag) {//也有可能因为超过最大下标从头开始存
                        for (int z = 0; z < findnum; z++) {
                            if (strcmp(find, h[z].data) == 0) {
                                flag = 1; QueryPerformanceCounter(&timeend);findnum = z;
                                break;
                            }
                        }
                    }
                }
             }
            if (!flag) cout << "查找失败" << endl;
            else {
                cout << "此单词的词频为: " << h[findnum].num << endl;
                cout << "查找该单词所花的时间";
                printf("%.15lf\n", (double)(timeend.QuadPart - timebegin.QuadPart) / quadpart * 100000);
                cout << "平均查找长度";
                printf("%.1lf\n", asl);
            }
        }
        else break;
        printf("按任意数字键返回\n");
        cin >> op;

    }
}
//06基于链地址法哈希查找  h
void linkhash()
{
    typedef struct List {//链表
        char data[50];
        int num;
        List* next;
    }Node, *Linklist;

    struct hashtbl {//未排序的哈希表
        int tblsize;
        Linklist L;
    }*H;

    struct orderhashtbl {//排序的哈希表
        int num;
        char data[50];
    }*tbl;
    while (1)
    {
        tbl = new struct orderhashtbl[5003];
        H = new struct hashtbl[5003];
        for (int i = 0; i < 5003; ++i) H[i].tblsize = 0;
        char *word = new char[33];//临时单词 
        char buffer;//临时字符 
        int i = 0;
        int j = 0;
        ifstream r("InFile.txt", ios::in);
        r.get(buffer);
        buffer = tolower(buffer);
        while (!r.eof()) {
            while (1) {
                if (buffer != '\'' && (buffer<'a' || buffer>'z' || buffer == ' '))
                    break;//如果当前字符不是字母 
                word[i++] = buffer;
                r.get(buffer); buffer = tolower(buffer);
            }
            if (i > 0 && word[0] >= 'a'&&word[0] <= 'z') {
                //    printf("放入新单词\n");
                word[i] = '\0';
                //printf("%s\n",word);
                int index = hashnum(word);
                if (H[index].tblsize == 0) {//没有冲突 
                    H[index].tblsize = 1;
                    Node* qaq = new Node;
                    strcpy(qaq->data, word); j++;
                    qaq->num = 1;
                    qaq->next = NULL;
                    H[index].L = new Node;
                    H[index].L->next = qaq;
                }
                else {//发生冲突 查看此单词是否出现过
                    Node* pos = NULL;
                    pos = H[index].L->next;
                    bool flag = 0;
                    while (pos != NULL) {
                        if (strcmp(pos->data, word) == 0) {//已出现
                            pos->num = pos->num + 1;
                            flag = 1;
                            break;
                        }
                        pos = pos->next;
                    }
                    //新单词 
                    if (!flag) {
                        avl = avl + 1;
                        Node* pre = new Node;
                        strcpy(pre->data, word); j++;
                        pre->num = 1;
                        pre->next = H[index].L->next;
                        H[index].L->next = pre;
                        H[index].tblsize += 1;
                    }
                }
            }
            i = 0;
            r.get(buffer);
            buffer = tolower(buffer);
        }
        int op;
        function();
        cin >> op;
        if (op == 1)
        {
            fstream w6("OutFile6.txt", ios::out);
            w6 << j << endl;
            //用顺序表存储排序
            int n = 0;
            for (int i = 0; i < 5003; ++i)
            {
                if (H[i].tblsize) {
                    Node* ptr = H[i].L->next;
                    while (ptr != NULL)
                    {
                        strcpy(tbl[n].data, ptr->data);
                        tbl[n++].num = ptr->num;
                        ptr = ptr->next;
                    }
                }
            }
            for (int u = 0; u < n-1 ; u++)//冒泡排序
                for (int v = 0; v < n - u-1 ; v++)
                    if (!cmpstr(tbl[v].data, tbl[v + 1].data)) {
                        char temp[50];
                        strcpy(temp, tbl[v].data);
                        strcpy(tbl[v].data, tbl[v + 1].data);
                        strcpy(tbl[v + 1].data, temp);
                        int t = tbl[v].num;
                        tbl[v].num = tbl[v + 1].num;
                        tbl[v + 1].num = t;
                    }
            for (int i = 0; i < n; i++)
            {
                w6 << tbl[i].data << ' ' << tbl[i].num << endl;
            }
            printf("统计完成,请查看对应OutFile6.txt文件\n");
        }
        else if (op == 2) {
            bool flag = 0;
            char find[50];
            printf("请输入你要查询的单词 : ");
            cin >> find; 
            avl = 0;
            LARGE_INTEGER timebegin;//开始计时
            LARGE_INTEGER timeend;//结束计时
            LARGE_INTEGER frequency;//计时器频率
            QueryPerformanceFrequency(&frequency);
            double quadpart = (double) frequency.QuadPart;
            QueryPerformanceCounter(&timebegin);
            int findnum = hashnum(find);
            if (H[findnum].tblsize) {//如果这个哈希节点下标有点东西
                Node* ptr = H[findnum].L->next;//那么查询这个哈希节点下标的链表
                while (ptr != NULL) {
                    
                    if (strcmp(ptr->data, find) == 0) {
                        cout << "此单词的词频为: " << ptr->num << endl;
                        QueryPerformanceCounter(&timeend);
                        flag = 1; break;
                    }
                    ptr = ptr->next;
                }
            }

            if (!flag)
            {
                printf("查找失败\n");
            }
            else {
                cout << "查找该单词所花的时间";
                printf("%.15lf\n", (double)(timeend.QuadPart - timebegin.QuadPart)/quadpart * 100000);
                cout << "平均查找长度";
                avl = (avl - 1) / 2.0;
                printf("%.1lf\n", avl);
            }
        }
        else if (op == 3) break;
        cout << "按任意数字键返回" << endl;
        scanf("%d", &op);
    }
}

Bnode* giveval(Bnode* node, int index, char *w[], int num[])
{
    strcpy(node->data, w[index]);
    node->num = num[index];
    node->lchild = node->rchild = NULL;
    return node;
}
/*夭折
Btree search_bintree(char* w[],int num[],int j) {
    int low = 0, high =  j- 1; int mid;//折半查找
        Bnode* node;
        Btree T = node ;    
        while (low <= high)
    {
        mid = (low + high) / 2;
        node = new Bnode;
        node = giveval(node,mid,w,num);

    }
        cout << T->data << ' ' << T->num << endl;
        return T;
}

*/
void countNumOfLevel(Btree root, int depth) {
    if (root == NULL || depth < 0)
        return;
    if (depth == 0) {
        res++;
        return;
    }
    countNumOfLevel(root->lchild, depth - 1);
    countNumOfLevel(root->rchild, depth - 1);
}

int Height(Btree p)
{
    if (p == NULL) return 0;
    int lHeight = Height(p->lchild);
    int rHeight = Height(p->rchild);
    return (lHeight > rHeight) ? (lHeight + 1) : (rHeight + 1);
}
View Code
 1 #include<iostream>
 2 #include<cstdio>
 3 #include<fstream>
 4 #include<string>
 5 #include<cstring>
 6 #include<ctime>
 7 #include<stddef.h>
 8 #include<cctype> 
 9 #include<malloc.h>
10 #include <cstdlib>
11 #include <windows.h>
12 void headline()
13 {        
14         printf("=============****************************============\n"); 
15         printf("=============* 北京林业大学单词统计系统 *=============\n");
16         printf("=============****************************=============\n\n");
17           
18         printf("请按下数字键进行功能选择(不能输入非法字符!):\n");
19         printf("(1):基于线性表的查找\n");
20         printf("(2):基于二叉排序树的查找\n");
21         printf("(3):基于哈希表的查找\n");
22         printf("(4):退出\n");
23 }
24 void function() 
25 {
26         printf("请按下数字键进行功能选择(不能输入非法字符!):\n");
27         printf("(1):词频统计\n");
28         printf("(2):单词查找\n");
29         printf("(3):返回上一级\n");
30         //printf("(4):基于链表的低频词过滤统计\n");
31 }
32 void thanks()
33 {
34     printf("=============*************************===============\n");
35     printf("=============*********谢谢本次使用****===============\n");
36     printf("=============*************************===============\n");
37 }
00提示.h
typedef struct Bnodes{//完全二叉树节点 
    char data[50];
    int num;
    Bnodes* lchild;
    Bnodes* rchild;
}Bnode,*Btree;
02
int hashnum(char *ss)
{
    return (int)((ss[0])-'a'+strlen(ss));
}
03
#include<iostream>
#include<cstdio>
#include<fstream>
#include<string>
#include<cstring>
#include<ctime>
#include<stddef.h>
#include<cctype> 
#include<malloc.h>
#include <cstdlib>
#include <windows.h>
#include <algorithm>
/*字典序大小比较*/
bool cmpstr(char *a,char *b)
{
    int la, lb, l;
    la=strlen(a);lb=strlen(b);
    l=la>lb?lb:la;
    for(int i=0;i<l;++i)
    {
        if(a[i]==b[i]&&i!=l-1)
            continue;
        else if(a[i]<b[i]||(a[i] == b[i] && i == l - 1&&la<lb))//a的字典序小,a就小,排前面 
            return 1;//特殊情况, an  和 and
        else if((i==l-1&&a[i]==b[i]&&lb<la)||a[i]>b[i])
            return 0;//a的字典序大,排后面 ,特殊情况, and  和 an
    }
}
struct drawlist {
    int address;//散列地址 
    int num;//词频 
    char data[50];//单词 
};

/*快速排序*/ 
void quick(char * w[],int num[],int low,int high)
{
    //指针交换法
    if(high<=low) return ;

    int p=low,q=high+1;
    char key[50];
    strcpy(key,w[low]) ;
    while(1)
    {
        //从左到右找比key大的值
        while(cmpstr(w[++p],key))
        {
            if(p==high)
                break;
        }
        //从右到左找比key小的值
        while(!cmpstr(w[--q],key))
        {
            if(q==low)
                break;
        } 
        if(p>=q) break;//交换p q对应的值
        char st[50];
        strcpy(st,w[p]);
        strcpy(w[p],w[q]);
        strcpy(w[q],st);
        int t;
        t=num[p];
        num[p]=num[q];
        num[q]=t;
    }
    char st[50];//low和q对应的值交换,因为此处时p q指针重合点
    strcpy(st,w[low]);
    strcpy(w[low],w[q]);
    strcpy(w[q],st);
    int t;
    t=num[low];
    num[low]=num[q];
    num[q]=t;
    quick(w,num,low,q-1);
    quick(w,num,q+1,high);
}
自定义算法
posted @ 2019-12-09 17:52  鹤花之歌  阅读(282)  评论(0编辑  收藏  举报