java算法面试题:从类似如下的文本文件中读取出所有的姓名,并打印出重复的姓名和重复的次数,并按重复次数排序 ;读取docx 读取doc 使用poi 相关jar包提集提供下载

从类似如下的文本文件中读取出所有的姓名,并打印出重复的姓名和重复的次数,并按重复次数排序

1,张三,28

2,李四,35

3,张三,28

4,王五,35

5,张三,28

6,李四,35

7,赵六,28

8,田七,35

package com.swift;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;

import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

public class IO_Sort_Select {

    public static void main(String[] args) throws IOException {
        /*
         * 从类似如下的文本文件中读取出所有的姓名,并打印出重复的姓名和重复的次数,并按重复次数排序
         */
        File file_s = new File("e:\\neck\\data.docx");
        File dir = new File("e:\\neck");
        if (!dir.exists() && dir.isDirectory()) {
            System.out.println("目录不存在,即将创建...");
            dir.mkdirs();
        }
        File file_t = new File(dir, "data.txt");

        List<String> list = new ArrayList<String>();
        String text = readFromDocx(file_s);
        String[] hang = text.split("\\n");
        for (int i = 0; i < hang.length; i++) {
            String[] lie = hang[i].split("\\,");
            for (int j = 0; j < lie.length; j++) {
                if (j == 1) {
                    list.add(lie[j]);
                }
            }

        }
        printList(list);

        Map<String, Integer> map = new TreeMap<String, Integer>();
        for (String str : list) {
            map.put(str, 0);
        }
        for(String str:list) {
            if(map.containsKey(str)) {
                int num=map.get(str);
                num++;
                map.remove(str);
                map.put(str, num);
            }
        }
        List<Entry<String,Integer>> listMap=new ArrayList<Entry<String,Integer>>(map.entrySet());
        Collections.sort(listMap, new Comparator<Entry<String,Integer>>(){

            @Override
            public int compare(Entry<String, Integer> arg0, Entry<String, Integer> arg1) {
                int num=arg1.getValue()-arg0.getValue();
                return num==0?arg0.getKey().compareTo(arg1.getKey()):num;
            }
        });
        
        for(Entry<String, Integer> entry:listMap) {
            System.out.println("重复的姓名是 :"+entry.getKey()+" 重复的次数是:"+entry.getValue());
        }
    }

    private static void printList(List<String> list) {
        for (String str : list) {
            System.out.println(str);
        }
    }

    public static String readFromDocx(File file) throws IOException {

        FileInputStream fis = new FileInputStream(file);
        XWPFDocument docx = new XWPFDocument(fis);
        XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
        String text = extractor.getText();
        return text;

    }

}

 上面代码读取的是docx的word文件,直接读取会出现乱码,因word中不仅有文本还有压缩的其他属性等内容,所以要使用poi的jar包进行解析

解析的jar包如下图

这个应该比较全面了,也可以解析excel操作或写入表格等

 

下载地址:

链接: https://pan.baidu.com/s/1htYPKLA 密码: e36e

 下面是读取doc的方法

package com.swift;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

public class IO_Sort_Select {

    public static void main(String[] args) throws IOException {
        /*
         * 从类似如下的文本文件中读取出所有的姓名,并打印出重复的姓名和重复的次数,并按重复次数排序
         */
        File file_s = new File("e:\\neck\\data.doc");
        File dir = new File("e:\\neck");
        if (!dir.exists() && dir.isDirectory()) {
            System.out.println("目录不存在,即将创建...");
            dir.mkdirs();
        }

        List<String> list = new ArrayList<String>();
        String text = readFromDoc(file_s);
        String[] hang = text.split("\\r");
        for (int i = 0; i < hang.length; i++) {
            System.out.println(hang.length);
            System.out.println(hang[i]);
            String[] lie = hang[i].split("\\,");
            for (int j = 0; j < lie.length; j++) {
                if (j == 1) {
                    list.add(lie[j]);
                }
            }
        }
        printList(list);

        Map<String, Integer> map = new TreeMap<String, Integer>();
        for (String str : list) {
            map.put(str, 0);
        }
        for(String str:list) {
            if(map.containsKey(str)) {
                int num=map.get(str);
                num++;
                map.remove(str);
                map.put(str, num);
            }
        }
        List<Entry<String,Integer>> listMap=new ArrayList<Entry<String,Integer>>(map.entrySet());
        Collections.sort(listMap, new Comparator<Entry<String,Integer>>(){

            @Override
            public int compare(Entry<String, Integer> arg0, Entry<String, Integer> arg1) {
                int num=arg1.getValue()-arg0.getValue();
                return num==0?arg0.getKey().compareTo(arg1.getKey()):num;
            }
        });
        
        for(Entry<String, Integer> entry:listMap) {
            System.out.println("重复的姓名是 :"+entry.getKey()+" 重复的次数是:"+entry.getValue());
        }
    }

    private static void printList(List<String> list) {
        for (String str : list) {
            System.out.println(str);
        }
    }

    public static String readFromDocx(File file) throws IOException {

        FileInputStream fis = new FileInputStream(file);
        XWPFDocument docx = new XWPFDocument(fis);
        XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
        String text = extractor.getText();
        return text;
    }
    public static String readFromDoc(File file) throws IOException {
        
        FileInputStream fis = new FileInputStream(file);
        HWPFDocument doc = new HWPFDocument(fis);
        String text = doc.getDocumentText();
        return text;
    }

    
}

doc读取出来的文本还不能用"\\n"进行行分割,用的是"\\r"才行,要不然只能分割出1行,这点注意,要不会觉得程序莫名其妙不安自己的思路走

posted @ 2018-01-25 21:47  Advancing-Swift  阅读(992)  评论(0编辑  收藏  举报