solr

tomcat上安装solr
***
新建一个空目录D:\server, apache-tomcat-8.0.11和solr-4.9.0拷贝到该目录下
 
在D:/server/apache-tomcat-8.0.11/conf/catalina/localhost下新建solr.xml,如下:
 
<?xml version="1.0" encoding="utf-8"?>
<Context docBase="D:\server\solr-4.9.0\example\webapps\solr.war" debug="0" crossContext="true">
<Environment name="solr/home" type="java.lang.String" value="D:\server\solr-4.9.0\example\solr" override="true"/>
</Context>
 
 步骤4,从solr-4.9.0\example\lib\ext复制所有的jar到tomcat/lib下,并复制solr-4.3.0\example\resources\log4j.properties到tomcat/lib下(有关日志的说明,见http://wiki.apache.org/solr/SolrLogging),须知,solr-4.3.0.jar并没有自带日志打印组件,因此这个步骤不执行,可能引起“org.apache.catalina.core.StandardContext filterStart SEVERE: Exception starting filter SolrRequestFilter org.apache.solr.common.SolrException: Could not find necessary SLF4j logging jars.”异常;
 
 完成上述步骤以后,启动tomcat,访问http://<host>:8080/solr/admin确认是否配置正常。
***
 
solr中文分词
***
schema.xml中添加:
<fieldType name="text_zh" class="solr.TextField"> 
  <analyzer class="org.wltea.analyzer.lucene.IKAnalyzer"/> 
 
  </fieldType>
新建目录D:\server\solr-4.9.0\example\solr\lib,把IKAnalyzer2012FF_u1.jar拷贝到该目录下
***
 
 
solr suggestion
***
在solrconfig.xml中设置如下
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
 
    <str name="queryAnalyzerFieldType">text_zh</str>
 
    <!-- Multiple "Spell Checkers" can be declared and used by this
         component
      -->
 
    <!-- a spellchecker built from a field of the main index -->
    <lst name="spellchecker">
      <str name="name">default</str>
      <str name="field">text</str>
      <str name="classname">solr.DirectSolrSpellChecker</str>
      
      <str name="distanceMeasure">internal</str>
      <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
      <float name="accuracy">0.5</float>
      <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
      <int name="maxEdits">2</int>
      <!-- the minimum shared prefix when enumerating terms -->
      <int name="minPrefix">1</int>
      <!-- maximum number of inspections per result. -->
      <int name="maxInspections">5</int>
      <!-- minimum length of a query term to be considered for correction -->
      <int name="minQueryLength">4</int>
      <!-- maximum threshold of documents a query term can appear to be considered for correction -->
      <float name="maxQueryFrequency">0.01</float>
      <!-- uncomment this to require suggestions to occur in 1% of the documents
      <float name="thresholdTokenFrequency">.01</float>
      -->
    </lst>
    
    <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->
    <!--<lst name="spellchecker">
      <str name="name">wordbreak</str>
      <str name="classname">solr.WordBreakSolrSpellChecker</str>      
      <str name="field">name</str>
      <str name="combineWords">true</str>
      <str name="breakWords">true</str>
      <int name="maxChanges">10</int>
    </lst>
-->
    <!-- a spellchecker that uses a different distance measure -->
    <!--
       <lst name="spellchecker">
         <str name="name">jarowinkler</str>
         <str name="field">spell</str>
         <str name="classname">solr.DirectSolrSpellChecker</str>
         <str name="distanceMeasure">
           org.apache.lucene.search.spell.JaroWinklerDistance
         </str>
       </lst>
     -->
 
    <!-- a spellchecker that use an alternate comparator 
 
         comparatorClass be one of:
          1. score (default)
          2. freq (Frequency first, then score)
          3. A fully qualified class name
      -->
    <!--
       <lst name="spellchecker">
         <str name="name">freq</str>
         <str name="field">lowerfilt</str>
         <str name="classname">solr.DirectSolrSpellChecker</str>
         <str name="comparatorClass">freq</str>
      -->
 
    <!-- A spellchecker that reads the list of words from a file -->
    <!--
       <lst name="spellchecker">
         <str name="classname">solr.FileBasedSpellChecker</str>
         <str name="name">file</str>
         <str name="sourceLocation">spellings.txt</str>
         <str name="characterEncoding">UTF-8</str>
         <str name="spellcheckIndexDir">spellcheckerFile</str>
       </lst>
      -->
  </searchComponent>
  
  <!-- A request handler for demonstrating the spellcheck component.  
 
       NOTE: This is purely as an example.  The whole purpose of the
       SpellCheckComponent is to hook it into the request handler that
       handles your normal user queries so that a separate request is
       not needed to get suggestions.
 
       IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
       NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!
       
       See http://wiki.apache.org/solr/SpellCheckComponent for details
       on the request parameters.
    -->
  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="df">text</str>
      <!-- Solr will use suggestions from both the 'default' spellchecker
           and from the 'wordbreak' spellchecker and combine them.
           collations (re-written queries) can include a combination of
           corrections from both spellcheckers -->
      <str name="spellcheck.dictionary">default</str>
      <str name="spellcheck.dictionary">wordbreak</str>
      <str name="spellcheck">on</str>
      <str name="spellcheck.extendedResults">true</str>       
      <str name="spellcheck.count">10</str>
      <str name="spellcheck.alternativeTermCount">5</str>
      <str name="spellcheck.maxResultsForSuggest">5</str>       
      <str name="spellcheck.collate">true</str>
      <str name="spellcheck.collateExtendedResults">true</str>  
      <str name="spellcheck.maxCollationTries">10</str>
      <str name="spellcheck.maxCollations">5</str>         
    </lst>
    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>
 
  <searchComponent name="suggest" class="solr.SpellCheckComponent">
  <lst name="spellchecker">
    <str name="name">suggest</str>
    <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
    <str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
    <str name="field">text</str>
    <float name="threshold">0.005</float>
    <str name="buildOnCommit">true</str>
    <!--<str name="storeDir">spellchecker</str>-->
  </lst>
</searchComponent>
 
 <requestHandler name="/suggest" class="org.apache.solr.handler.component.SearchHandler">
  <lst name="defaults">
    <str name="spellcheck">true</str>
    <str name="spellcheck.dictionary">suggest</str>
    <str name="spellcheck.count">10</str>
    <str name="spellcheck.onlyMorePopular">true</str>
    <str name="spellcheck.collate">true</str>
  </lst>
  <arr name="components">
    <str>suggest</str>
  </arr>
</requestHandler>
 
注意红色部分的表示索引字段和类型
***
 
 
 
从mysql中导入数据index
***
在solrconfig.xml中加入下面内容
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">data-config.xml</str>
</lst>
  </requestHandler>
 
把D:\solr-4.9.0\dist下的solr-dataimporthandler-4.9.0.jar拷贝到D:\apache-tomcat-8.0.11\lib下
 
在D:\apache-tomcat-8.0.11\solr\collection1\conf下新建文件data-config.xml
 
<dataConfig>
<dataSource type="JdbcDataSource"
   driver="com.mysql.jdbc.Driver"
                    url="jdbc:mysql://localhost:3306/xplusplus"
                    user="root"
                    password="root"/>
<document name="xplusplus">
<entity name="article" query="select * from c_article"
deltaImportQuery="select * from c_article where item_num='${dataimporter.delta.id}'"
deltaQuery="select id from c_article where 'mod_timestamp' > '${dataimporter.last_index_time}'">
<field column="id" name="id"/>
<field column="item_num" name="item_num"/>
<field column="image" name="image"/>
<field column="thumbnail" name="thumbnail"/>
<field column="fine_line_cat" name="fine_line_cat"/>
<field column="fine_line_scat" name="fine_line_scat"/>
<field column="ucode" name="ucode"/>
<field column="en_desc" name="en_desc"/>
<field column="cn_desc" name="cn_desc"/>
<field column="pos_desc" name="pos_desc"/>
<field column="size" name="size"/>
<field column="color" name="color"/>
<field column="store_price" name="store_price"/>
<field column="was_price" name="was_price"/>
<field column="cost" name="cost"/>
</entity>
</document>
</dataConfig>
 
query,被用来做为全量导入的时候使用
deltaImportQuery 这个是在增量时使用的修改语句,其中需要注意的是dataimporter.delta这个前缀一定要带
deltaQuery,这个是用来查询需要被更新的对象的主键,以便deltaImportQuery使用
***
posted @ 2014-08-29 08:51  pacozhong  阅读(183)  评论(0编辑  收藏  举报