Lucene创建索引与搜索索引试手
由于仿写的源码的版本是Lucene2.1.0,我用的Lucene已经是4.5.0了,所以像创建IndexWriter、IndexSearcher的时候源码的已经不能用了,只好自己查api摸索,所以有个老师在旁边指导该多好。
首先我创建的是中文的索引。
CJKAnalyzer是:对中文汉字,每两个字作为一个词条
StandardAnalyzer是:单个汉字作为一个词条
所以如果要查询像:“大禹”这样俩个字的词条时,用CJKAnalyzer,查询像“水”这样的词条时,需要改用StandardAnalyzer。我在这里纠结了很久不知道哪里错了。
还有就是StringField和TextField的区别。api的解释分别是:
TextField:A field that is indexed and tokenized, without term vectors. For example this would be used on a 'body' field, that contains the bulk of a document's text.
StringField:A field that is indexed but not tokenized: the entire String value is indexed as a single token. For example this might be used for a 'country' field or an 'id' field, or any field that you intend to use for sorting or access through the field cache.
现在看看也没很多错的地方,但是写了仨小时。期间各种查api啊,还是那句话,有个老师指点一下的话,我就能少走很多弯路,节省很多时间了。唉。。。
package org.apache.lucene; import java.awt.BorderLayout; import java.awt.Container; import java.awt.GridLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.io.File; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.text.SimpleDateFormat; import java.util.Date; import javax.swing.JButton; import javax.swing.JFileChooser; import javax.swing.JFrame; import javax.swing.JLabel; import javax.swing.JOptionPane; import javax.swing.JPanel; import javax.swing.JScrollPane; import javax.swing.JTextArea; import javax.swing.JTextField; import javax.swing.SwingUtilities; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import com.wb.tool.FileList; import com.wb.tool.FileText; public class LuceneIndexer { private JTextField jtfa; private JButton jba; private JTextField jtfb; private JButton jbb; private JButton jbc; private static JTextArea jta; private void createAndShowGUI() { // 设置跨平台外观感觉 //String lf=UIManager.getCrossPlatformLookAndFeelClassName(); //GTK //String lf="com.sun.java.swing.plaf.gtk.GTKLookAndFeel"; //System //String lf=UIManager.getSystemLookAndFeelClassName(); //windows //String lf="com.sun.java.swing.plaf.windows.WindowsLookAndFeel"; //metal //String lf="javax.swing.plaf.metal.MetalLookAndFeel"; /**common use try { UIManager.setLookAndFeel(lf); } catch(Exception ce) { JOptionPane.showMessageDialog(null,"无法设定外观感觉!"); } **/ //Java感觉 JFrame.setDefaultLookAndFeelDecorated(true); JFrame frame=new JFrame("TEST"); frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); final JFileChooser fc=new JFileChooser(); fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); Container con= frame.getContentPane(); con.setLayout(new BorderLayout()); JPanel jpup=new JPanel(); jpup.setLayout(new GridLayout(3,2)); jtfa=new JTextField(30); jba=new JButton("选择被索引的文件存放路径"); jba.addActionListener ( new ActionListener() { public void actionPerformed(ActionEvent e) { int r=fc.showOpenDialog(null); if(r==JFileChooser.APPROVE_OPTION) { jtfa.setText(fc.getSelectedFile().getPath()); jbc.setEnabled(true); } } } ); jtfb=new JTextField(30); JButton jbb=new JButton("选择索引的存放路径"); jbb.addActionListener ( new ActionListener() { public void actionPerformed(ActionEvent e) { int r=fc.showOpenDialog(null); if(r==JFileChooser.APPROVE_OPTION) { jtfb.setText(fc.getSelectedFile().getPath()); jbc.setEnabled(true); } } } ); JLabel jl=new JLabel(""); jbc=new JButton("建立索引"); jbc.addActionListener ( new ActionListener() { public void actionPerformed(ActionEvent e) { try { LuceneIndexerTool.index(jtfa.getText(),jtfb.getText()); //jbc.setEnabled(false); } catch(Exception ee) { ee.printStackTrace(); jbc.setEnabled(true); JOptionPane.showMessageDialog(null,"索引创建失败!"); System.out.println(ee.getMessage()); } } } ); jpup.add(jtfa); jpup.add(jba); jpup.add(jtfb); jpup.add(jbb); jpup.add(jl); jpup.add(jbc); jta=new JTextArea(10,60); JScrollPane jsp=new JScrollPane(jta); con.add(jpup,BorderLayout.NORTH); con.add(jsp,BorderLayout.CENTER); frame.setSize(200,100); frame.pack(); frame.setVisible(true); } public static void main(String[] args) { SwingUtilities.invokeLater( new Runnable() { public void run() { new LuceneIndexer().createAndShowGUI(); } } ); } static class LuceneIndexerTool { public static void index(String filePath, String indexPath) throws IOException { Path path = Paths.get(indexPath); Directory dir = FSDirectory.open(path); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(dir, config); String s[] = FileList.getFiles(filePath); int len = s.length; for(int i=0; i<len; i++) { File file = new File(s[i]); String ext = getExt(file); if((ext.equalsIgnoreCase("htm")) || (ext.equalsIgnoreCase("html"))) { Document doc = new Document(); Field field; String fileName = file.getName(); field = new TextField("fileName", fileName, Field.Store.YES); doc.add(field); String uri = file.getPath(); field = new TextField("uri", uri, Field.Store.YES); doc.add(field); Date dt = new Date(file.lastModified()); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-mm-dd"); String date = sdf.format(dt); field = new TextField("date", date, Field.Store.YES); doc.add(field); double l = file.length(); String size = ""; if(l>1024) size = String.valueOf(Math.floor(l/1024)) + "K"; else size = String.valueOf(size) + "Bytes"; field = new TextField("size", size, Field.Store.YES); doc.add(field); String text = FileText.getText(file); field = new TextField("text", text, Field.Store.YES); doc.add(field); String digest = ""; if(text.length() > 200) digest = text.substring(0, 200); else digest = text; field = new TextField("digest", digest, Field.Store.YES); doc.add(field); writer.addDocument(doc); jta.setText(jta.getText() + "已经加入索引:" + file + "\n"); } } writer.close(); } public static String getExt(File file) { String s = file.getName(); s = s.substring(s.lastIndexOf(".") + 1); return s; } } }
</pre><pre name="code" class="java"><pre name="code" class="java">package org.apache.lucene; import java.awt.BorderLayout; import java.awt.Container; import java.awt.FlowLayout; import java.awt.GridLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.nio.file.Path; import java.nio.file.Paths; import javax.swing.JButton; import javax.swing.JFileChooser; import javax.swing.JFrame; import javax.swing.JOptionPane; import javax.swing.JPanel; import javax.swing.JScrollPane; import javax.swing.JTextArea; import javax.swing.JTextField; import javax.swing.SwingUtilities; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class LuceneSearcher { private JTextField jtfa; private JButton jba; private JTextField jtfb; private JButton jbb; private JButton jbc; private static JTextArea jta; private JTextField jtfc; private JButton jbd; private JButton jbe; private void createAndShowGUI() { // 设置跨平台外观感觉 //String lf=UIManager.getCrossPlatformLookAndFeelClassName(); //GTK //String lf="com.sun.java.swing.plaf.gtk.GTKLookAndFeel"; //System //String lf=UIManager.getSystemLookAndFeelClassName(); //windows //String lf="com.sun.java.swing.plaf.windows.WindowsLookAndFeel"; //metal //String lf="javax.swing.plaf.metal.MetalLookAndFeel"; /**common use try { UIManager.setLookAndFeel(lf); } catch(Exception ce) { JOptionPane.showMessageDialog(null,"无法设定外观感觉!"); } **/ //Java感觉 JFrame.setDefaultLookAndFeelDecorated(true); JFrame frame=new JFrame("Tianen Searcher! yutianen@163.com"); frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); final JFileChooser fc=new JFileChooser(); fc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY); Container con= frame.getContentPane(); con.setLayout(new BorderLayout()); JPanel jpup=new JPanel(); jpup.setLayout(new GridLayout(2,2)); jtfa=new JTextField(30); jba=new JButton("选择索引的存放路径"); jba.addActionListener ( new ActionListener() { public void actionPerformed(ActionEvent e) { int r=fc.showOpenDialog(null); if(r==JFileChooser.APPROVE_OPTION) { jtfa.setText(fc.getSelectedFile().getPath()); } } } ); jtfb=new JTextField(30); JButton jbb=new JButton("搜索"); jbb.addActionListener ( new ActionListener() { public void actionPerformed(ActionEvent e) { try { String indexPath=jtfa.getText(); String phrase=jtfb.getText(); new LuceneSearcherTool().search(phrase,indexPath); System.out.println("123"); } catch(Exception ex) { JOptionPane.showMessageDialog(null,"搜索失败!","提示",JOptionPane.ERROR_MESSAGE); } } } ); jpup.add(jtfa); jpup.add(jba); jpup.add(jtfb); jpup.add(jbb); jta=new JTextArea(10,30); JScrollPane jsp=new JScrollPane(jta); JPanel jpdown=new JPanel(); jpdown.setLayout(new FlowLayout()); jtfc=new JTextField(35); jbd=new JButton("设定导出路径"); fc.setFileSelectionMode(JFileChooser.FILES_AND_DIRECTORIES); jbd.addActionListener ( new ActionListener() { public void actionPerformed(ActionEvent e) { int r=fc.showOpenDialog(null); if(r==JFileChooser.APPROVE_OPTION) { jtfc.setText(fc.getSelectedFile().getPath()); } } } ); jbe=new JButton("导出搜索结果"); jbe.addActionListener ( new ActionListener() { public void actionPerformed(ActionEvent e) { try { File f=new File(jtfc.getText()); FileWriter fw=new FileWriter(f); PrintWriter pw=new PrintWriter(fw); pw.write(jta.getText()); pw.flush(); pw.close(); JOptionPane.showMessageDialog(null,"写入文件成功!","提示",JOptionPane.INFORMATION_MESSAGE); } catch(IOException ioe) { JOptionPane.showMessageDialog(null,"写入文件失败!","提示",JOptionPane.ERROR_MESSAGE); } } } ); jpdown.add(jtfc); jpdown.add(jbd); jpdown.add(jbe); con.add(jpup,BorderLayout.NORTH); con.add(jsp,BorderLayout.CENTER); con.add(jpdown,BorderLayout.SOUTH); frame.setSize(200,100); frame.pack(); frame.setVisible(true); } public static void main(String[] args) { SwingUtilities.invokeLater( new Runnable() { public void run() { new LuceneSearcher().createAndShowGUI(); } } ); } static class LuceneSearcherTool { public void search(String phrase, String indexPath) throws IOException, ParseException { Path path = Paths.get(indexPath); Directory dir = FSDirectory.open(path); IndexReader ir = DirectoryReader.open(dir); IndexSearcher is = new IndexSearcher(ir); Analyzer analyzer = new StandardAnalyzer(); QueryParser parser = new QueryParser("text", analyzer); Query query = parser.parse(phrase); TopDocs hits = is.search(query, 10); for(ScoreDoc scoreDoc: hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); if(doc == null) continue; Field field = (Field) doc.getField("fileName"); String fileName = field.stringValue(); field = (Field) doc.getField("uri"); String uri = field.stringValue(); field = (Field) doc.getField("date"); String date = field.stringValue(); field = (Field) doc.getField("digest"); String digest = field.stringValue(); StringBuffer sb = new StringBuffer(); sb.append("URI:" + uri + "\n"); sb.append("filename:" + fileName + "\n"); sb.append("date:" + date + "\n"); sb.append("digest:" + digest + "\n"); sb.append("------------------------------------\n"); jta.setText(jta.getText() + sb.toString()); } ir.close(); dir.close(); } } }