2019.11.04课堂测试
课堂测试1
要求一:
英语的26 个字母的频率在一本小说中是如何分布的?某类型文章中常出现的单词是什么?某作家最常用的词汇是什么?《飘》 中最常用的短语是什么,等等。
输出某个英文文本文件中 26 字母出现的频率,由高到低排列,并显示字母出现的百分比,精确到小数点后面两位。
注:1、字母频率 = 这个字母出现的次数 / (所有A-Z,a-z字母出现的总数)
2、如果两个字母出现的频率一样,那么就按照字典序排列。
源代码如下:
package classTest; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; public class wordcount1 { static String str = ""; static String str1 = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; static char ch1[] = str1.toCharArray(); public static double num[] = new double[100]; public static int sum = 0; public static void read() { File file = new File("Harry Potter and the Sorcerer's Stone.txt"); try { FileReader r = new FileReader(file); BufferedReader br = new BufferedReader(r); str = br.readLine(); while (str != null) { for (int j = 0; j < str.length(); j++) { for (int k = 0; k < str1.length(); k++) { if (str.charAt(j) == str1.charAt(k)) { sum++; num[k]++; } } } str = br.readLine(); } br.close(); for (int p = 0; p < str1.length() - 1; p++) { int o = p; for (int q = p; q < str1.length(); q++) { if (num[o] < num[q]) { o = q; } } if (o != p) { char ff = ch1[o]; ch1[o] = ch1[p]; ch1[p] = ff; double fff = num[o]; num[o] = num[p]; num[p] = fff; } } for (int k = 0; k < str1.length(); k++) { num[k] = num[k] / sum * 100; System.out.print(ch1[k]); System.out.printf(":%.2f", num[k]); System.out.println("%"); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { read(); } }
运行结果
课堂测试2
要求二:
输出单个文件中的前 N 个最常出现的英语单词。
作用:一个用于统计文本文件中的英语单词出现频率的控制台程序;
单词:以英文字母开头,由英文字母和字母数字符号组成的字符串视为一个单词。单词以分隔符分割且不区分大小写。在输出时,所有单词都用小写字符表示。
源代码如下:
package classTest; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.Scanner; public class wordcount2 { private static String str = ""; private static Scanner sc = new Scanner(System.in); private static BufferedReader cin = null; private static String a[] = new String[1000000]; private static String c[] = new String[10000000]; private static int b[] = new int[1000000]; private static int length = 0; private static int length1 = 0; private static int nn = 0; private static int j = 0; public static void cun() throws IOException {// 将单词存到数组a { while (str != null) { int i = 0; str = str.toLowerCase(); // 把大写改成小写 for (i = 0; i < str.length(); i++) { if ((str.charAt(i) > 96 && str.charAt(i) < 123)) { a[j] = a[j] + str.charAt(i); } if (str.charAt(i) == ' ' || str.charAt(i) == ',' || str.charAt(i) == '.') { if (!a[j].equals("")) { j = j + 1; a[j] = ""; } } } str = cin.readLine(); } length = j; } } public static void show() {// 显示 for (int k = 0; k < nn; k++) { System.out.print(c[k] + "\t" + b[k] + " "); // System.out.printf("%.2f",(double)b[k]/length1*100); // System.out.print("%"); System.out.println(""); } } public static void Sorting() {// 排序 int t3 = 0; int t2 = 0; String sr = ""; for (int i = 0; i < length1 - 1; i++) { t3 = i; for (int j = i + 1; j < length1; j++) { if (b[t3] < b[j]) { t3 = j; } } if (t3 != i) { t2 = b[i]; b[i] = b[t3]; b[t3] = t2; sr = c[i]; c[i] = c[t3]; c[t3] = sr; } } } public static void Statistics() {// 去重 for (int k = 0; k < length; k++) { b[k] = 0; } c[0] = a[0]; int tt = 1; Boolean rt = true; for (int i = 1; i < length; i++) { rt = false; for (int j = 0; j < tt; j++) { if (a[i].equals(c[j])) { rt = true; break; } } if (!rt) { c[tt] = a[i]; tt++; } } length1 = tt; for (int i = 0; i < length1; i++) { for (int j = 0; j < length; j++) { if (c[i].equals(a[j])) { b[i]++; } } } } public static void Readfile() { File file = new File("Harry Potter and the Sorcerer's Stone.txt"); try { InputStreamReader read = new InputStreamReader(new FileInputStream(file), "UTF-8"); cin = new BufferedReader(read); str = cin.readLine(); cun(); cin.close(); read.close(); } catch (IOException e) { System.out.println("读取失败!"); e.printStackTrace(); } } public static void main(String[] args) throws IOException { System.out.println("请输入需要统计的个数:"); nn = sc.nextInt(); a[0] = ""; Readfile(); Statistics(); Sorting(); show(); } }
运行结果
课堂测试3
要求三
输出文件中所有不重复的单词,按照出现次数由多到少排列,出现次数同样多的,以字典序排列。 英文字母:A-Z,a-z 字母数字符号:A-Z,a-z,0-9 分割符:空格,非字母数字符号 例:good123是一个单词,123good不是一个单词。good,Good和GOOD是同一个单词
源代码:
package classTest; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStreamReader; public class wordcount3 { private static String str=""; private static BufferedReader cin=null; private static String a[]=new String[1000000]; private static String c[]=new String[10000000]; private static int b[]=new int[1000000]; private static int length=0; private static int length1=0; private static int j=0; public static void cun() throws IOException {//将单词存到数组a { while(str!=null) { int i=0; str=str.toLowerCase(); //把大写改成小写 for(i=0;i<str.length();i++) { if((str.charAt(i)>96&&str.charAt(i)<123)) { a[j]=a[j]+str.charAt(i); } if(str.charAt(i)==' '||str.charAt(i)==','||str.charAt(i)=='.') { if(!a[j].equals("")) { j=j+1; a[j]=""; } } } str=cin.readLine(); } length=j; } } public static void Sorting() {//排序 int t3=0; int t2=0; String sr=""; for(int i=0;i<length1-1;i++) { t3=i; for(int j=i+1;j<length1;j++) { if(b[t3]<b[j]) { t3=j; } } if(t3!=i) { t2=b[i]; b[i]=b[t3]; b[t3]=t2; sr=c[i]; c[i]=c[t3]; c[t3]=sr; } } } public static void Statistics(){//去重 for(int k=0;k<length;k++) { b[k]=0; } c[0]=a[0]; int tt=1; Boolean rt=true; for(int i=1;i<length;i++) { rt=false; for(int j=0;j<tt;j++) { if(a[i].equals(c[j])) { rt=true; break; } } if(!rt) { c[tt]=a[i]; tt++; } } length1=tt; for(int i=0;i<length1;i++) { for(int j=0;j<length;j++) { if(c[i].equals(a[j])) { b[i]++; } } } } public static void Readfile() { File file=new File("Harry Potter and the Sorcerer's Stone.txt"); try { InputStreamReader read = new InputStreamReader(new FileInputStream(file),"UTF-8"); cin=new BufferedReader(read); str=cin.readLine(); cun(); cin.close(); read.close(); } catch(IOException e) { System.out.println("读取失败!"); e.printStackTrace(); } } public static void Writefile() throws IOException { File file=new File("t1.txt"); if(!file.exists()) file.createNewFile(); FileWriter write = new FileWriter(file,true); BufferedWriter out=new BufferedWriter(write); for(int i=0;i<length1;i++){ out.write("这是第"+(i+1)+"个: "); double f4=(double)b[i]/length1*100; out.write(c[i]+"\t"+b[i]+"\t"+f4); out.write("\r\n"); } out.close(); } public static void show1() { for(int k=0;k<length1;k++) { System.out.print(c[k]+"\t \t\t"+b[k]+"\n"); } }public static void main(String[] args) throws IOException { a[0]=""; Readfile(); Statistics(); Sorting(); System.out.println("程序中所以不重复的单词!"); show1(); Writefile(); } }
因为输出所有单词超过缓冲区所能缓存的范围,于是我采用文件输出的方式:
课堂测试4
要求四:
指定文件目录,对目录下每一个文件执行 功能1的操作
源代码:
package classTest; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; public class wordcount4 { private static BufferedReader cin = null; private static String a[] = new String[1000000]; private static String c[] = new String[10000000]; private static int b[] = new int[1000000]; private static int length = 0; private static int length1 = 0; static File[] list = new File("F:\\JAVA").listFiles(); public static void Sorting() {// 排序 int t3 = 0; int t2 = 0; String sr = ""; for (int i = 0; i < length1 - 1; i++) { t3 = i; for (int j = i + 1; j < length1; j++) { if (b[t3] < b[j]) { t3 = j; } } if (t3 != i) { t2 = b[i]; b[i] = b[t3]; b[t3] = t2; sr = c[i]; c[i] = c[t3]; c[t3] = sr; } } } public static void Statistics() {// 去重 for (int k = 0; k < length; k++) { b[k] = 0; } c[0] = a[0]; int tt = 1; Boolean rt = true; for (int i = 1; i < length; i++) { rt = false; for (int j = 0; j < tt; j++) { if (a[i].equals(c[j])) { rt = true; break; } } if (!rt) { c[tt] = a[i]; tt++; } } length1 = tt; for (int i = 0; i < length1; i++) { for (int j = 0; j < length; j++) { if (c[i].equals(a[j])) { b[i]++; } } } } public static void Readfile() { File file = new File("Harry Potter and the Sorcerer's Stone.txt"); try { InputStreamReader read = new InputStreamReader(new FileInputStream(file), "UTF-8"); cin = new BufferedReader(read); cin.close(); read.close(); } catch (IOException e) { System.out.println("读取失败!"); e.printStackTrace(); } } public static void show1() { for (int k = 0; k < length1; k++) { System.out.print(c[k] + "\t \t\t" + b[k] + " "); System.out.printf("%.2f", (double) b[k] / length1 * 100); System.out.print("%"); System.out.println(""); } } public static void rode1(File[] list) { for (File file : list) { if (file.isFile()) { a[length++] = file.getAbsolutePath(); } } } public static void main(String[] args) throws IOException { rode1(list); Statistics(); Sorting(); show1(); } }
运行结果
课堂测试5
要求五:
指定文件目录, 但是会递归遍历目录下的所有子目录,每个文件执行功能1的操作。
源代码:
package classTest; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; public class wordcount5 { private static BufferedReader cin = null; private static String a[] = new String[1000000]; private static String c[] = new String[10000000]; private static int b[] = new int[1000000]; private static int length = 0; private static int length1 = 0; static File[] list = new File("F:\\Java (学习用)").listFiles(); public static void Sorting() {// 排序 int t3 = 0; int t2 = 0; String sr = ""; for (int i = 0; i < length1 - 1; i++) { t3 = i; for (int j = i + 1; j < length1; j++) { if (b[t3] < b[j]) { t3 = j; } } if (t3 != i) { t2 = b[i]; b[i] = b[t3]; b[t3] = t2; sr = c[i]; c[i] = c[t3]; c[t3] = sr; } } } public static void Statistics() {// 去重 for (int k = 0; k < length; k++) { b[k] = 0; } c[0] = a[0]; int tt = 1; Boolean rt = true; for (int i = 1; i < length; i++) { rt = false; for (int j = 0; j < tt; j++) { if (a[i].equals(c[j])) { rt = true; break; } } if (!rt) { c[tt] = a[i]; tt++; } } length1 = tt; for (int i = 0; i < length1; i++) { for (int j = 0; j < length; j++) { if (c[i].equals(a[j])) { b[i]++; } } } } public static void Readfile() { File file = new File("Harry Potter and the Sorcerer's Stone.txt"); try { InputStreamReader read = new InputStreamReader(new FileInputStream(file), "UTF-8"); cin = new BufferedReader(read); cin.close(); read.close(); } catch (IOException e) { System.out.println("读取失败!"); e.printStackTrace(); } } public static void show1() { for (int k = 0; k < length1; k++) { System.out.print(c[k] + "\t \t\t" + b[k] + " "); System.out.printf("%.2f", (double) b[k] / length1 * 100); System.out.print("%"); System.out.println(""); } } public static void rode1(File[] list) { for (File file : list) { if (file.isFile()) { a[length++] = file.getAbsolutePath(); } else if (file.isDirectory()) { String str3 = file.getAbsolutePath(); list = new File(str3).listFiles(); rode1(list); } } } public static void main(String[] args) throws IOException { rode1(list); Statistics(); Sorting(); show1(); } }
运行结果