统计单词,字母出现的次数和频率

一、统计所给出文件中英文字母出现的频率(区分大小写),并且按着出现频率倒序输出

思路:将文件用BufferedReader读取

对每行进行读取在进行分割成单词

对单词进行循环判断是否在A-Z,a-z之间,若在存储到数组里计数

最终进行排序

package com.wenjian;
import java.util.Scanner;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.io.*;
public class hali {
    public static <type> void main (String[] args) throws IOException {
        File file=new File("E:\\Harry Potter and the Sorcerer's Stone.txt");                  //��ȡ�ļ�
        if(!file.exists()){
            System.out.println("文件打不开");
            return;
        }  
        Scanner scanner=new Scanner(file);
        BufferedReader buf = new BufferedReader(new FileReader(file));
        int []num=new int[100];//计数数组
        char []zimu=new char[100];//字母表数组
        char a='A';char b='a';
        for(int i=1;i<=52;i++)
        {
            if(i<=26)
                zimu[i]=a++;
            else
                zimu[i]=b++;
        }
        //计数
        String s=buf.readLine();
        while(s!=null) {
            String[] lineWords=s.split(" ");          
            for(int i=0;i<lineWords.length;i++) {
                for(int j=0;j<lineWords[i].length();j++)
                {
                    if(lineWords[i].charAt(j)>='A'&&lineWords[i].charAt(j)<='Z')
                        num[lineWords[i].charAt(j)-'A'+1]++;
                    else if(lineWords[i].charAt(j)>='a'&&lineWords[i].charAt(j)<='z')
                        num[lineWords[i].charAt(j)-'a'+1+24]++;
                }
            }
            s=buf.readLine();
        }
        //求总次数
        int sum=0;
        for(int i=1;i<=52;i++)
        {
            sum+=num[i];
        }
        //排序
        for(int i=1;i<=52;i++)
        {
            for(int j=i+1;j<=52;j++)
            {
                if(num[i]<num[j])
                {
                    int t=num[i];
                    num[i]=num[j];
                    num[j]=t;
                    
                    char p=zimu[i];
                    zimu[i]=zimu[j];
                    zimu[j]=p;
                }
            }
        }
        System.out.println(sum);
        for(int i=1;i<=52;i++)
        {
            double ans=num[i]*1.0/sum*100;
            System.out.println(zimu[i]+":"+String.format("%.2f", ans)+"%");
        }
}
}

 

二、输出单个文件的前N个最常出现的英文单词

思路:用输入流读取文件

利用HashMap<String,Integer>来存储单词和计数

利用split进行空格分割

然后再输出前n个单词频率高的

package com.wenjian;
import java.io.File;
import java.util.Scanner;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
public class nword {
    public static <type> void main (String[] args) throws FileNotFoundException {
        File file=new File("E:\\Harry Potter and the Sorcerer's Stone.txt");                  //��ȡ�ļ�
        if(!file.exists()){
            System.out.println("未找到文件");
            return;
        }
        Scanner in=new Scanner(System.in);
        System.out.println("请输入前n个常用单词");
        int n=in.nextInt();
        Scanner scanner=new Scanner(file);
        HashMap<String,Integer> hashMap=new HashMap<String,Integer>();
        while(scanner.hasNextLine()) {
            String line=scanner.nextLine();
            String[] lineWords=line.split(" ");          
            Set<String> wordSet=hashMap.keySet();
            for(int i=0;i<lineWords.length;i++) {
                if(wordSet.contains(lineWords[i])) {//判断set集合里是否有该单词
                    Integer number=hashMap.get(lineWords[i]);//若有次数+1
                    number++;
                    hashMap.put(lineWords[i], number); 
                    }
                else {//没有就将其放入set里,次数为1
                    hashMap.put(lineWords[i], 1);
                }
            }
        }
        //计算总体单词数
        int sum=0;
        Iterator<String> it=hashMap.keySet().iterator();
        while(it.hasNext()){
            sum+=hashMap.get(it.next());
        }
        //输出前n个单词
        while(n>0)
        {
            Iterator<String> iterator=hashMap.keySet().iterator();
            int max=0;
            String maxword=null;
            while(iterator.hasNext()){
                String word=iterator.next();
                
                if(hashMap.get(word)>max) {                         
                    max=hashMap.get(word);
                    maxword=word;
                }
            }  
            hashMap.remove(maxword);
            double ans=max*1.0/sum*100;
            if(!maxword.equals("")) {
                System.out.println(maxword+":"+max);
                n--;
            }
            
        }
    }
}

 

 三:指定文件目录,但是会递归目录下的所有子目录,每个文件执行统计单词的数量以及百分比

思路:首先获取目录路径

对目录下的目录进行判断,如果还是目录继续递归,否则就输出该文档里的单词

package com.wenjian;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.util.Scanner;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
public class neng2 {
    //输出文档单词
    static public void print(String f) throws IOException
    {
        File file=new File(f);
        if(!file.exists()){
            System.out.println("无法打开文件");
            return;
        }
        BufferedReader buf = new BufferedReader(new FileReader(file));
        HashMap<String,Integer> hashMap=new HashMap<String,Integer>();
        String s;
        while((s=buf.readLine())!=null) {
            String[] lineWords=s.split(" ");         
            Set<String> wordSet=hashMap.keySet();
            for(int i=0;i<lineWords.length;i++) {
                if(wordSet.contains(lineWords[i])) {
                    Integer number=hashMap.get(lineWords[i]);
                    number++;
                    hashMap.put(lineWords[i], number); 
                    }
                else {
                    hashMap.put(lineWords[i], 1);
                }
            }
        }
     
            while(hashMap.size()>0)
            {
                Iterator<String> iterator=hashMap.keySet().iterator();
                int max=0;
                String maxword=null;
                while(iterator.hasNext()){
                    String word=iterator.next();
                    
                    if(hashMap.get(word)>max) {                          
                        max=hashMap.get(word);
                        maxword=word;
                    }
                    else if(hashMap.get(word)==max)
                    {
                        if(word.compareTo(maxword)<0)
                        {
                            
                            maxword=word;
                        }
                    }
                }
                hashMap.remove(maxword);
                if(!maxword.equals(""))
                System.out.println(maxword+":"+max+" ");
            }
       
    }
    //递归文件
    static public void getDirectory(File file) throws IOException 
    {
    File flist[] = file.listFiles();
          if (flist == null || flist.length == 0) {
              return;
          }
          for (File f : flist) {
              if (f.isDirectory()) {
                  
                   getDirectory(f);
                 
              } else 
              {
                 
                  System.out.println("file==>" + f.getAbsolutePath());
                  
                  print( f.getAbsolutePath());
                  System.out.println();
              }
          }
    }
    
    static Scanner sc=new Scanner(System.in);
        public static void main(String[] args)throws IOException
        {
            String path="D:\\test"; 
            File fm=new File(path);   
            getDirectory(fm);
            
        }
}
        

 

posted @ 2019-11-05 21:49  清风紫雪  阅读(1580)  评论(0编辑  收藏  举报