统计文本中重复的内容
1.统计一个文本中重复的内容
package count; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.InputStreamReader; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; public class countWord { public static void main(String[] args) { count("F:\\A\\B.xml"); } public static void count(String filepath) { try { File file = new File(filepath); if(!file.exists()) { System.out.println("file not exist"); return; } //create BufferedReader to improve efficient InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8"); BufferedReader bufReader = new BufferedReader(isr); String line = null; //create map collection to record information Map<String,Integer> map = new HashMap<String,Integer>(); while((line = bufReader.readLine()) != null) { if(map.containsKey(line)) map.put(line,map.get(line)+1); else map.put(line,1); } //print map collction showMap(map); } catch (Exception ex) { ex.printStackTrace(); } } private static void showMap(Map<String,Integer> map) { if(map == null) return; Set<String> keyset = map.keySet(); Iterator<String> it = keyset.iterator(); int count = 0; while(it.hasNext()) { String s = it.next(); if(map.get(s) > 1) {//个数大于1 System.out.println( s+ "......" + map.get(s)); count++; } } System.out.println("重复两次的数据:" + count); } }
2.统计两个文本中重复的内容
package count;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
public class countWordTowFile {
public static void main(String[] args) {
count("F:\\A\\B.xml","C:\\D\\E.txt");
}
public static void count(String filepath,String filepath2)
{
try
{
File file = new File(filepath);
File file2 = new File(filepath2);
if(!file.exists() || !file2.exists())
{
System.out.println("file not exist");
return;
}
//create BufferedReader to improve efficient
InputStreamReader isr = new InputStreamReader(new FileInputStream(file), "UTF-8");
InputStreamReader isr2 = new InputStreamReader(new FileInputStream(file2), "UTF-8");
BufferedReader bufReader = new BufferedReader(isr);
BufferedReader bufReader2 = new BufferedReader(isr2);
String line = null;
String line2 = null;
//create map collection to record information
Map<String,Integer> map = new HashMap<String,Integer>();
Map<String,Integer> map2 = new HashMap<String,Integer>();
while((line = bufReader.readLine()) != null)//读取第一个文件中的数据
{
map.put(line,1);
}
while((line2 = bufReader2.readLine()) != null) {//读取第二个文件中的内容
if(map.containsKey(line2)) {
map2.put(line2,map.get(line2)+1);
}
}
//print map collction
showMap(map2);
}
catch (Exception ex)
{
ex.printStackTrace();
}
}
private static void showMap(Map<String,Integer> map)
{
if(map == null)
return;
Set<String> keyset = map.keySet();
Iterator<String> it = keyset.iterator();
int count = 0;
while(it.hasNext())
{
String s = it.next();
System.out.println( s+ "......" + map.get(s));
count++;
}
System.out.println("重复两次的数据:" + count);
}
}