java读取解析endnote文件
有些项目中会要求代码解析endnote文献资料获取一些标准的信息,例如XX在某著名期刊上发表了某篇文章,关于发表文章的这个事情的描述就会给坐着一个endnote文件来记录文章名称、作者、期刊名称、出版社等信息。
这些信息如果要记录在某个系统中,用户不愿意自己填写那些信息,他更愿意上传endnote文件让系统自己解析数据。网上找了一堆也没发现一个写的好的,找了一个自己改了改清晰多了,贡献给大家。
下面直接上代码看java如何解析endnote文件
一、依赖
pom文件:
<dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-test</artifactId> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.9</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.15</version> </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> <version>3.4</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.9</version> </dependency> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.4</version> </dependency> <dependency> <groupId>commons-fileupload</groupId> <artifactId>commons-fileupload</artifactId> <version>1.3.2</version> </dependency> <dependency> <groupId>dom4j</groupId> <artifactId>dom4j</artifactId> <version>1.6.1</version> </dependency>
二、相关的bean
package com.example.demo.endNote; import java.util.List; /** * 文献基本信息 * */ public class DocInfo { private String Author;//作者 private String Type;//文献类型 private String Title;//标题 private List<String> Authors;//作者 private String Journal;//文献来源/刊名 private String Volume;//卷号 private String Issue;//期号 private String PageScope;//页码范围 private String ISSN;//国际刊号 private String Year;//出版年份 private String Publisher;//出版单位 public String getAuthor() { return Author; } public void setAuthor(String author) { Author = author; } public String getType() { return Type; } public void setType(String type) { Type = type; } public String getTitle() { return Title; } public void setTitle(String title) { Title = title; } public List<String> getAuthors() { return Authors; } public void setAuthors(List<String> authors) { Authors = authors; } public String getJournal() { return Journal; } public void setJournal(String journal) { Journal = journal; } public String getVolume() { return Volume; } public void setVolume(String volume) { Volume = volume; } public String getIssue() { return Issue; } public void setIssue(String issue) { Issue = issue; } public String getPageScope() { return PageScope; } public void setPageScope(String pageScope) { PageScope = pageScope; } public String getISSN() { return ISSN; } public void setISSN(String ISSN) { this.ISSN = ISSN; } public String getYear() { return Year; } public void setYear(String year) { Year = year; } public String getPublisher() { return Publisher; } public void setPublisher(String publisher) { Publisher = publisher; } @Override public String toString() { return "DocInfo{" + "Type='" + Type + '\'' + ", Title='" + Title + '\'' + ", Authors=" + Authors + ", Journal='" + Journal + '\'' + ", Volume='" + Volume + '\'' + ", Issue='" + Issue + '\'' + ", PageScope='" + PageScope + '\'' + ", ISSN='" + ISSN + '\'' + ", Year='" + Year + '\'' + ", Publisher='" + Publisher + '\'' + '}'; } }
三、实现方法和主类
package com.example.demo.endNote; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; public class EndNoteProcessor { //主类启动方法 public static void main(String[] args) { EndNoteProcessor p= new EndNoteProcessor(); DocInfo d=p.process("E:\\test.enw"); System.out.println(d.toString()); } //读取endnote文件主要方法 public DocInfo process(String fileName) { DocInfo d = new DocInfo(); try { String encoding = "utf-8"; File file = new File(fileName); if (file.isFile() && file.exists()) { // 判断文件是否存在 InputStreamReader read = new InputStreamReader(new FileInputStream(file), encoding);// 考虑到编码格式 BufferedReader bufferedReader = new BufferedReader(read); String lineTxt = null; List<String> Authors = new ArrayList<String>(); while ((lineTxt = bufferedReader.readLine()) != null) { System.out.println(lineTxt); if (lineTxt != null && !lineTxt.equals("")) { //文献类型 if (lineTxt.startsWith("%0")) { d.setType(lineTxt.substring("%0".length() + 1)); } //题目 if (lineTxt.startsWith("%T")) { d.setTitle(lineTxt.substring("%T".length() + 1)); } //作者 可能有多个作者 if (lineTxt.startsWith("%A")) { Authors.add(lineTxt.substring("%A".length() + 1)); } //期刊名 if (lineTxt.startsWith("%J")) { d.setJournal(lineTxt.substring("%J".length() + 1)); } //卷号 if (lineTxt.startsWith("%V")) { d.setVolume(lineTxt.substring("%V".length() + 1)); } //期号 if (lineTxt.startsWith("%N")) { d.setIssue(lineTxt.substring("%N".length() + 1)); } //页数 if (lineTxt.startsWith("%P")) { d.setPageScope(lineTxt.substring("%P".length() + 1)); } //期刊号 if (lineTxt.startsWith("%@")) { d.setISSN(lineTxt.substring("%@".length() + 1)); } //发表时间 if (lineTxt.startsWith("%D")) { d.setYear(lineTxt.substring("%D".length() + 1)); } //出版社 if (lineTxt.startsWith("%I")) { d.setPublisher(lineTxt.substring("%I".length() + 1)); } } //添加作者信息 d.setAuthors(Authors); } read.close(); } else { System.out.println("找不到指定的文件"); } } catch (Exception e) { System.out.println("读取文件内容出错"); e.printStackTrace(); } return d; } }
四、代码中E:\\test.enw中的内容
%0 Journal Article %T 题目测试 %A 王丽文 %A 李四 %J 江苏商论 %V 第一卷 %N 06 %P 87-89+103 %@ 1009-0061 %D 2021 %I 测试出版社