| package experiment10.exp2; |
| |
| import com.hankcs.hanlp.HanLP; |
| import com.hankcs.hanlp.corpus.document.sentence.word.Word; |
| import com.hankcs.hanlp.corpus.tag.Nature; |
| import com.hankcs.hanlp.seg.common.Term; |
| import experiment9.FileName; |
| |
| import java.io.*; |
| import java.nio.charset.Charset; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| |
| |
| |
| |
| public class ReverseIndex { |
| public static void main(String[] args) throws IOException { |
| BufferedReader bufferedReader1=new BufferedReader(new FileReader(FileName.fileName10_1, Charset.forName("utf-8"))); |
| BufferedReader bufferedReader2=new BufferedReader(new FileReader(FileName.fileName10_2,Charset.forName("utf-8"))); |
| BufferedReader bufferedReader3=new BufferedReader(new FileReader(FileName.fileName10_3,Charset.forName("utf-8"))); |
| |
| BufferedWriter bufferedWriter= new BufferedWriter(new FileWriter(FileName.fileNameOut10_1)); |
| |
| HashMap<String,Integer> countMap1 = new HashMap<>(); |
| HashMap<String,Integer> countMap2 = new HashMap<>(); |
| HashMap<String,Integer> countMap3 = new HashMap<>(); |
| |
| HashMap<String,WordNode> map=new HashMap<>(); |
| |
| String line=""; |
| |
| countWords(bufferedReader1, countMap1, line); |
| for(String word : countMap1.keySet()){ |
| WordNode node=new WordNode(word); |
| node.addToList("倚天屠龙记.txt->"+countMap1.get(word)); |
| map.put(word, node); |
| } |
| countWords(bufferedReader2,countMap2,line); |
| for(String word : countMap2.keySet()){ |
| if(!map.containsKey(word)){ |
| WordNode node=new WordNode(word); |
| node.addToList("鹿鼎记.txt->"+countMap2.get(word)); |
| map.put(word, node); |
| } |
| else { |
| map.get(word).addToList("鹿鼎记.txt->"+countMap2.get(word)); |
| } |
| |
| } |
| countWords(bufferedReader3, countMap3, line); |
| for(String word : countMap3.keySet()){ |
| if(!map.containsKey(word)){ |
| WordNode node=new WordNode(word); |
| node.addToList("笑傲江湖.txt->"+countMap3.get(word)); |
| map.put(word, node); |
| } |
| else { |
| map.get(word).addToList("笑傲江湖.txt->"+countMap3.get(word)); |
| } |
| |
| } |
| |
| |
| StringBuffer stringBuffer = new StringBuffer(); |
| for(String item : map.keySet()){ |
| stringBuffer.append(map.get(item)); |
| } |
| System.out.println(stringBuffer); |
| bufferedWriter.write(stringBuffer.toString()); |
| bufferedWriter.flush(); |
| bufferedReader1.close(); |
| bufferedReader2.close(); |
| bufferedReader3.close(); |
| bufferedWriter.close(); |
| |
| } |
| |
| |
| static void countWords(BufferedReader bufferedReader,HashMap<String,Integer> countMap,String line) throws IOException { |
| while ((line=(bufferedReader.readLine()))!=null){ |
| |
| List<Term> segment = HanLP.segment(line); |
| for(Term x:segment){ |
| |
| String keyString=x.toString(); |
| if(countMap.containsKey(keyString)){ |
| countMap.put(keyString,countMap.get(keyString)+1); |
| }else { |
| countMap.put(keyString,1); |
| } |
| } |
| } |
| } |
| } |
| |
用到一个类WordNode辅助统计.
| package experiment10.exp2; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| public class WordNode { |
| String word; |
| List<String> list = new ArrayList<>(); |
| |
| public WordNode(String word) { |
| this.word = word; |
| |
| } |
| |
| public String getWord() { |
| return word; |
| } |
| |
| public List<String> getList() { |
| return list; |
| } |
| |
| public void setWord(String word) { |
| this.word = word; |
| } |
| |
| public void addToList(String fileNameAndFrequency) { |
| list.add(fileNameAndFrequency); |
| } |
| |
| @Override |
| public String toString() { |
| StringBuffer buffer=new StringBuffer(); |
| for(String phrase:list){ |
| buffer.append(phrase+"\t"); |
| } |
| return word+"\t"+buffer+"\n"; |
| } |
| } |
| |
| package experiment10.exp2; |
| |
| import com.hankcs.hanlp.HanLP; |
| import com.hankcs.hanlp.corpus.document.sentence.word.Word; |
| import com.hankcs.hanlp.corpus.tag.Nature; |
| import com.hankcs.hanlp.seg.common.Term; |
| import experiment9.FileName; |
| |
| import java.io.*; |
| import java.nio.charset.Charset; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| |
| |
| |
| |
| public class ReverseIndex { |
| public static void main(String[] args) throws IOException, InterruptedException { |
| BufferedReader bufferedReader1 = new BufferedReader(new FileReader(FileName.fileName10_1, Charset.forName("utf-8"))); |
| BufferedReader bufferedReader2 = new BufferedReader(new FileReader(FileName.fileName10_2, Charset.forName("utf-8"))); |
| BufferedReader bufferedReader3 = new BufferedReader(new FileReader(FileName.fileName10_3, Charset.forName("utf-8"))); |
| |
| BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(FileName.fileNameOut10_1)); |
| |
| HashMap<String, Integer> countMap1 = new HashMap<>(); |
| HashMap<String, Integer> countMap2 = new HashMap<>(); |
| HashMap<String, Integer> countMap3 = new HashMap<>(); |
| |
| HashMap<String, WordNode> map = new HashMap<>(); |
| |
| String line = ""; |
| |
| |
| |
| |
| Thread thread1 = new Thread(new Runnable() { |
| @Override |
| public void run() { |
| try { |
| countWords(bufferedReader1, countMap1, line); |
| bufferedReader1.close(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| for (String word : countMap1.keySet()) { |
| WordNode node = new WordNode(word); |
| node.addToList("倚天屠龙记.txt->" + countMap1.get(word)); |
| map.put(word, node); |
| } |
| } |
| }); |
| thread1.start(); |
| |
| Thread thread2 = new Thread(new Runnable() { |
| @Override |
| public void run() { |
| try { |
| countWords(bufferedReader2, countMap2, line); |
| bufferedReader2.close(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| |
| for (String word : countMap2.keySet()) { |
| if (!map.containsKey(word)) { |
| WordNode node = new WordNode(word); |
| node.addToList("鹿鼎记.txt->" + countMap2.get(word)); |
| map.put(word, node); |
| } else { |
| map.get(word).addToList("鹿鼎记.txt->" + countMap2.get(word)); |
| } |
| } |
| } |
| }); |
| thread2.start(); |
| |
| Thread thread3 = new Thread(new Runnable() { |
| |
| @Override |
| public void run() { |
| try { |
| countWords(bufferedReader3, countMap3, line); |
| bufferedReader3.close(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| for (String word : countMap3.keySet()) { |
| if (!map.containsKey(word)) { |
| WordNode node = new WordNode(word); |
| node.addToList("笑傲江湖.txt->" + countMap3.get(word)); |
| map.put(word, node); |
| } else { |
| map.get(word).addToList("笑傲江湖.txt->" + countMap3.get(word)); |
| } |
| |
| } |
| } |
| }); |
| thread3.start(); |
| |
| thread1.join(); |
| thread2.join(); |
| thread3.join(); |
| |
| |
| StringBuffer stringBuffer = new StringBuffer(); |
| for (String item : map.keySet()) { |
| stringBuffer.append(map.get(item)); |
| } |
| |
| System.out.println(stringBuffer); |
| bufferedWriter.write(stringBuffer.toString()); |
| bufferedWriter.flush(); |
| |
| bufferedWriter.close(); |
| |
| } |
| |
| |
| static void countWords(BufferedReader bufferedReader, HashMap<String, Integer> countMap, String line) throws IOException { |
| while ((line = (bufferedReader.readLine())) != null) { |
| |
| List<Term> segment = HanLP.segment(line); |
| for (Term x : segment) { |
| |
| String keyString = x.toString(); |
| if (countMap.containsKey(keyString)) { |
| countMap.put(keyString, countMap.get(keyString) + 1); |
| } else { |
| countMap.put(keyString, 1); |
| } |
| } |
| } |
| } |
| } |
| |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
2022-06-22 EM@数列@等差数列
2022-06-22 js_解构赋值_对象解构/解构对象语句的调试
2021-06-22 PC_规格化数及尾数相关表示形式和范围
2021-06-22 PC_二进制移位运算/定点数移位/算数移位及其移位后的空位添补规则/机器数位数扩充
2021-06-22 PC_溢出概念+判断方法+示例