java读取解析endnote文件

  有些项目中会要求代码解析endnote文献资料获取一些标准的信息,例如XX在某著名期刊上发表了某篇文章,关于发表文章的这个事情的描述就会给坐着一个endnote文件来记录文章名称、作者、期刊名称、出版社等信息。

这些信息如果要记录在某个系统中,用户不愿意自己填写那些信息,他更愿意上传endnote文件让系统自己解析数据。网上找了一堆也没发现一个写的好的,找了一个自己改了改清晰多了,贡献给大家。

下面直接上代码看java如何解析endnote文件

一、依赖

pom文件:

 <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>

  
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.9</version>
        </dependency>

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.15</version>
        </dependency>

        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.4</version>
        </dependency>

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-scratchpad</artifactId>
            <version>3.9</version>
        </dependency>

        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.4</version>
        </dependency>
        <dependency>
            <groupId>commons-fileupload</groupId>
            <artifactId>commons-fileupload</artifactId>
            <version>1.3.2</version>
        </dependency>

        <dependency>
            <groupId>dom4j</groupId>
            <artifactId>dom4j</artifactId>
            <version>1.6.1</version>
        </dependency>
View Code

二、相关的bean

package com.example.demo.endNote;

import java.util.List;
/**
 * 文献基本信息
 *
 */
 public class DocInfo {


    private String Author;//作者
    private String Type;//文献类型
    private String Title;//标题
    private List<String> Authors;//作者
    private String Journal;//文献来源/刊名
    private String Volume;//卷号
    private String Issue;//期号
    private String PageScope;//页码范围
    private String ISSN;//国际刊号
    private String Year;//出版年份
    private String Publisher;//出版单位


    public String getAuthor() {
        return Author;
    }

    public void setAuthor(String author) {
        Author = author;
    }

    public String getType() {
        return Type;
    }

    public void setType(String type) {
        Type = type;
    }

    public String getTitle() {
        return Title;
    }

    public void setTitle(String title) {
        Title = title;
    }

    public List<String> getAuthors() {
        return Authors;
    }

    public void setAuthors(List<String> authors) {
        Authors = authors;
    }

    public String getJournal() {
        return Journal;
    }

    public void setJournal(String journal) {
        Journal = journal;
    }

    public String getVolume() {
        return Volume;
    }

    public void setVolume(String volume) {
        Volume = volume;
    }

    public String getIssue() {
        return Issue;
    }

    public void setIssue(String issue) {
        Issue = issue;
    }

    public String getPageScope() {
        return PageScope;
    }

    public void setPageScope(String pageScope) {
        PageScope = pageScope;
    }

    public String getISSN() {
        return ISSN;
    }

    public void setISSN(String ISSN) {
        this.ISSN = ISSN;
    }

    public String getYear() {
        return Year;
    }

    public void setYear(String year) {
        Year = year;
    }

    public String getPublisher() {
        return Publisher;
    }

    public void setPublisher(String publisher) {
        Publisher = publisher;
    }

    @Override
    public String toString() {
        return "DocInfo{" +
                "Type='" + Type + '\'' +
                ", Title='" + Title + '\'' +
                ", Authors=" + Authors +
                ", Journal='" + Journal + '\'' +
                ", Volume='" + Volume + '\'' +
                ", Issue='" + Issue + '\'' +
                ", PageScope='" + PageScope + '\'' +
                ", ISSN='" + ISSN + '\'' +
                ", Year='" + Year + '\'' +
                ", Publisher='" + Publisher + '\'' +
                '}';
    }
}
View Code

三、实现方法和主类

package com.example.demo.endNote;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;



public class EndNoteProcessor {

    //主类启动方法
    public static void main(String[] args) {
        EndNoteProcessor p= new EndNoteProcessor();
        DocInfo d=p.process("E:\\test.enw");
        System.out.println(d.toString());
    }

    //读取endnote文件主要方法
    public DocInfo process(String fileName) {
        DocInfo d = new DocInfo();

        try {
            String encoding = "utf-8";
            File file = new File(fileName);
            if (file.isFile() && file.exists()) { // 判断文件是否存在
                InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);// 考虑到编码格式
                BufferedReader bufferedReader = new BufferedReader(read);
                String lineTxt = null;

                List<String> Authors = new ArrayList<String>();
                while ((lineTxt = bufferedReader.readLine()) != null) {
                    System.out.println(lineTxt);

                    if (lineTxt != null && !lineTxt.equals("")) {

                        //文献类型
                        if (lineTxt.startsWith("%0")) {
                            d.setType(lineTxt.substring("%0".length() + 1));
                        }
                        //题目
                        if (lineTxt.startsWith("%T")) {
                            d.setTitle(lineTxt.substring("%T".length() + 1));
                        }
                        //作者 可能有多个作者
                        if (lineTxt.startsWith("%A")) {
                            Authors.add(lineTxt.substring("%A".length() + 1));
                        }
                        //期刊名
                        if (lineTxt.startsWith("%J")) {
                            d.setJournal(lineTxt.substring("%J".length() + 1));
                        }
                        //卷号
                        if (lineTxt.startsWith("%V")) {
                            d.setVolume(lineTxt.substring("%V".length() + 1));
                        }

                        //期号
                        if (lineTxt.startsWith("%N")) {
                            d.setIssue(lineTxt.substring("%N".length() + 1));
                        }
                        //页数
                        if (lineTxt.startsWith("%P")) {
                            d.setPageScope(lineTxt.substring("%P".length() + 1));
                        }

                        //期刊号
                        if (lineTxt.startsWith("%@")) {
                            d.setISSN(lineTxt.substring("%@".length() + 1));
                        }
                        //发表时间
                        if (lineTxt.startsWith("%D")) {
                            d.setYear(lineTxt.substring("%D".length() + 1));
                        }
                        //出版社
                        if (lineTxt.startsWith("%I")) {
                            d.setPublisher(lineTxt.substring("%I".length() + 1));
                        }
                    }
                    //添加作者信息
                    d.setAuthors(Authors);
                }
                read.close();
            } else {
                System.out.println("找不到指定的文件");
            }
        } catch (Exception e) {
            System.out.println("读取文件内容出错");
            e.printStackTrace();
        }
        return d;
    }

}
View Code

 四、代码中E:\\test.enw中的内容

%0 Journal Article
%T 题目测试
%A 王丽文
%A 李四
%J 江苏商论
%V 第一卷
%N 06
%P 87-89+103
%@ 1009-0061
%D 2021
%I 测试出版社
View Code

                                                     

posted @ 2021-06-23 19:16  万笑佛  阅读(123)  评论(0编辑  收藏  举报