单词统计

读入一个.txt文件，输出26个英文字母出现的频率

1：输出某个英文文本文件中 26 字母出现的频率，由高到低排列，并显示字母出现的百分比，精确到小数点后面两位。

2：输出单个文件中的前 N 个最常出现的英语单词。作用：一个用于统计文本文件中的英语单词出现频率。

设计思想：首先是统计字母，我们应该先把要统计的文件读取，遍历统计字母出现的次数，将大写字母转换为小写字母；统计单词也需要将大写字母转换为小写，只要遇到空格则记为一个单词，遍历一遍统计单词个数。

import java.io.FileNotFoundException;

import java.io.FileWriter;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.util.ArrayList;

import java.util.Arrays;

import java.util.Collections;

import java.util.HashMap;

import java.util.Scanner;

import java.awt.List;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileReader;

import java.io.IOException;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import java.io.FileOutputStream;  

import java.io.PrintStream;

import java.text.NumberFormat;

public class tongji {

public static void main(String[] args) {

    File src =new File("c:/Harry Potter and the Sorcerer's Stone.txt");

    InputStream is=null;

   try {

    is=new FileInputStream(src);

    int temp;

    int[] p=new int[56];

    while((temp=is.read())!=-1)//当数据为不存在时，返回-1

    {

        char t=(char)temp;

        if(t=='a'||t=='A')

        {

            p[0]++;

        }

        if(t=='b'||t=='B')

        {

            p[1]++;

        }

        if(t=='c'||t=='C')

        {

            p[2]++;

        }

        if(t=='d'||t=='D')

        {

            p[3]++;

        }

        if(t=='e'||t=='E')

        {

            p[4]++;

        }

        if(t=='f'||t=='F')

        {

            p[5]++;

        }

        if(t=='g'||t=='G')

        {

            p[6]++;

        }

        if(t=='h'||t=='H')

        {

            p[7]++;

        }

        if(t=='i'||t=='I')

        {

            p[8]++;

        }

        if(t=='j'||t=='J')

        {

            p[9]++;

        }

        if(t=='k'||t=='K')

        {

            p[10]++;

        }

        if(t=='l'||t=='L')

        {

            p[11]++;

        }

        if(t=='m'||t=='M')

        {

            p[12]++;

        }

        if(t=='n'||t=='N')

        {

            p[13]++;

        }

        if(t=='o'||t=='O')

        {

            p[14]++;

        }

        if(t=='P'||t=='p')

        {

            p[15]++;

        }

        if(t=='q'||t=='Q')

        {

            p[16]++;

        }

        if(t=='r'||t=='R')

        {

            p[17]++;

        }

        if(t=='S'||t=='s')

        {

            p[18]++;

        }

        if(t=='t'||t=='T')

        {

            p[19]++;

        }

        if(t=='u'||t=='U')

        {

            p[20]++;

        }

        if(t=='v'||t=='V')

        {

            p[21]++;

        }

        if(t=='w'||t=='W')

        {

            p[22]++;

        }

        if(t=='X'||t=='x')

        {

            p[23]++;

        }

        if(t=='Y'||t=='y')

        {

            p[24]++;

        }

        if(t=='z'||t=='Z')

        {

            p[25]++;

        }

                        }

    int[] y=new int[26];

    for(int r=0;r<26;r++)

    {

        y[r]=p[r];

    }

    

    int templ=0;

    for(int i=0;i<26;i++)

    {

    templ+=p[i];

    }

    float qq=(float)templ;

    int te;

    //冒泡排序

    for(int g=0;g<24;g++)

    {

    for(int f=0;f<24-g;f++)

    {

        if(p[f]<p[f+1])

        {

            te=p[f];

            p[f]=p[f+1];

            p[f+1]=te;

        }

    }}

    for(int j=0;j<26;j++) {

        NumberFormat nt = NumberFormat.getPercentInstance();//获取百分数实例

        nt.setMinimumFractionDigits(2);//保留百分数后两位

        char w=' ';

        for(int b=0;b<26;b++) {

            if(p[j]==y[b]) {

                switch (b) {

                case 0:

                    w='a';

                    break;

                case 1:

                    w='b';

                    break;

                case 2:

                    w='c';

                    break;

                case 3:

                    w='d';

                    break;

                case 4:

                    w='e';

                    break;

                case 5:

                    w='f';

                    break;

                case 6:

                    w='g';

                    break;

                case 7:

                    w='h';

                    break;

                case 8:

                    w='i';

                    break;

                case 9:

                    w='j';

                    break;

                case 10:

                    w='k';

                    break;

                case 11:

                    w='l';

                    break;

                case 12:

                    w='m';

                    break;

                case 13:

                    w='n';

                    break;

                case 14:

                    w='o';

                    break;

                case 15:

                    w='p';

                    break;

                case 16:

                    w='q';

                    break;

                case 17:

                    w='r';

                    break;

                case 18:

                    w='s';

                    break;

                case 19:

                    w='t';

                    break;

                case 20:

                    w='u';

                    break;

                case 21:

                    w='v';

                    break;

                case 22:

                    w='w';

                    break;

                case 23:

                    w='x';

                    break;

                case 24:

                    w='y';

                    break;

                case 25:

                    w='z';

                    break;

                default:

                    break;

                }

            }

            }

        

        float q=(float)p[j];

        System.out.println(w+"---"+nt.format(q/qq));

        //System.out.println(p[j]/templ);

    }

    //System.out.println(templ);

    //System.out.println(p[0]);

} catch (FileNotFoundException e) {

    e.printStackTrace();

} catch (IOException e) {

    

    e.printStackTrace();

}finally {

    try {

        is.close();

    } catch (IOException e) {

        e.printStackTrace();

    }}    }}

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.PrintWriter;

import java.nio.file.NoSuchFileException;

import java.util.*;

import java.util.StringTokenizer;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

 

public class danci{

    public static void main(String[] args) throws IOException{

        ArrayList<String> AL = new ArrayList<String>();

        try {

            FileInputStream IS = new FileInputStream("c:/Harry Potter and the Sorcerer's Stone.txt");

            Scanner S = new Scanner(IS);

            while(S.hasNextLine()){

                StringTokenizer st = new StringTokenizer(StringFunc(S.nextLine()));

                while(st.hasMoreTokens()) {

                    AL.add(st.nextToken());

                }

            }

            IS.close();

        } catch (FileNotFoundException e) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        }

        HashFunc(AL);

    }

    

    //handle the string

    public static String StringFunc(String Str) {

        Str = Str.toLowerCase();

        Str = Pattern.compile("[^A-Za-z]+").matcher(Str).replaceAll(" ");

        return Str;

    }

    

    //put elements in a hashtable and count how many times they appear

    public static void HashFunc(ArrayList<String> AL) {

        HashMap<String, Integer> Hmap = new LinkedHashMap<>();

        Collections.sort(AL);

        for (String temp : AL) {

            Integer count = Hmap.get(temp);

            Hmap.put(temp, (count == null) ? 1 : count + 1);

        }

        Iterator iter = Hmap.entrySet().iterator();

        while (iter.hasNext()) {

            Map.Entry entry = (Map.Entry) iter.next();

            Object key = entry.getKey();

            Object val = entry.getValue();

            System.out.println(val + " " + key);

        }    

    }

}

posted @ 2020-05-05 10:28 嘻嘻哒丸子阅读(118) 评论(0) 收藏举报

刷新页面返回顶部

嘻嘻哒丸子

晚来天欲雪，可饮一杯无。

单词统计

读入一个.txt文件，输出26个英文字母出现的频率

公告