Solr 自定义DIH导入字段处理

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler.dataimport;

import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.tianditu.tools.CreateUUID;
/**
 * <p>
 * A {@link Transformer} implementation which uses Regular Expressions to
 * extract, split and replace data in fields.
 * </p>
 * <p/>
 * <p>
 * Refer to <a
 * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache
 * .org/solr/DataImportHandler</a> for more details.
 * </p>
 * <p/>
 * <b>This API is experimental and may change in the future.</b>
 * 
 * @since solr 1.3
 * @see Pattern
 */
public class NameTransforms extends Transformer {
    private static final Logger LOG = LoggerFactory.getLogger(NameTransforms.class);
    private static final String NAMEMETHOD = "namemethod";

    @Override
    public Object transformRow(Map<String, Object> row, Context context) {
    List<Map<String, String>> nameMap = context.getAllEntityFields();
    for (Map<String, String> map : nameMap) {
        Integer locate = 0;
        String col = map.get(DataImporter.COLUMN);//获取行名
        String namemethod = context.replaceTokens(map.get(NAMEMETHOD));
        Object colValue = row.get(col); //获取列明对应的值
        if(colValue == null ){
            continue;
        }
        if(namemethod != null){
            locate =  nameJudge(namemethod);
        }
        switch (locate) {
        //docmd5
        case 0:
            String valString0 = colValue.toString();
            Object nameFiled0 = CreateUUID.getUUID((String) valString0);
            System.out.println(nameFiled0);
            if(nameFiled0 != null){
            row.put(col, nameFiled0);
            }
            break;
        case 1:
            String valString = colValue.toString();
            Object nameFiled = nametransfer(valString , null);
            nameFiled = nameFiled + "我不好!";
            System.out.println(nameFiled);
            if(nameFiled != null){
            row.put(col, nameFiled);
            }
            break;
        case 2:
            String valString1 = colValue.toString();
            Object nameFiled1 = valString1+"你好!";
            System.out.println(nameFiled1);
            if(nameFiled1 != null){
            row.put(col, nameFiled1);
            }
            break;
        default:
            break;
        }
    }
        return row;
    }
    private Integer nameJudge(String name) {
        Integer loca  = Integer.valueOf(name);
        return loca;
    }
    private  String   nametransfer(String name , String [] arr){
        String names = name;
        names = names.replaceAll("\\(旧\\)", "");
        boolean flg = IsEnglish(name);
        if (!flg)
            names = names.replaceAll(" ", "");
        else
            ;
        names = names.replace("***", " ");
        names = names.trim();
        if (!names.equalsIgnoreCase("")) {
            names = StringTool.CharStandardization(names);
        }

        return names;
    }
    private  boolean IsEnglish(String strRecord) {
        int length = strRecord.length();

        for (int i = 0; i < length - 1; i++) {
            char c = strRecord.charAt(i);
            if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == ' '
                    || (c >= '0' && c <= '9') || c == '*') {
                //continue;
                return true;
            } else {
                 continue;
            }
        }
        return false;
    }
}

<dataConfig>

<dataSource driver="oracle.jdbc.driver.OracleDriver" url="jdbc:oracle:thin:@//localhost:1521/ORCL" user="POI" password="POI" />   

  <document name="products">         <entity pk="NID"  name="POIBASEINFO" query="select * from POIBASEINFO3"  transformer="NameTransforms">  

  <field column="CITYCODENEW" name="id"  namemethod="0" />  

  <field column="NID" name="poi_docmd5" namemethod="2" />    

  <field column="NAME" name="poi_name" namemethod="1" /> 

  </entity>

  </document>
</dataConfig> 

{
        "poi_name": [ "蒲我不好!"
        ],
        "poi_keyword": [
          "蒲我不好!"
        ],
        "id": "40FE6006B666AB94",
        "poi_docmd5": "1026675535你好!",
        "_version_": 1512451070406688700
      },

 

posted @ 2015-09-16 15:05  王南辉  阅读(359)  评论(0编辑  收藏  举报