Solr 自定义DIH导入字段处理
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.handler.dataimport; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.tianditu.tools.CreateUUID; /** * <p> * A {@link Transformer} implementation which uses Regular Expressions to * extract, split and replace data in fields. * </p> * <p/> * <p> * Refer to <a * href="http://wiki.apache.org/solr/DataImportHandler">http://wiki.apache * .org/solr/DataImportHandler</a> for more details. * </p> * <p/> * <b>This API is experimental and may change in the future.</b> * * @since solr 1.3 * @see Pattern */ public class NameTransforms extends Transformer { private static final Logger LOG = LoggerFactory.getLogger(NameTransforms.class); private static final String NAMEMETHOD = "namemethod"; @Override public Object transformRow(Map<String, Object> row, Context context) { List<Map<String, String>> nameMap = context.getAllEntityFields(); for (Map<String, String> map : nameMap) { Integer locate = 0; String col = map.get(DataImporter.COLUMN);//获取行名 String namemethod = context.replaceTokens(map.get(NAMEMETHOD)); Object colValue = row.get(col); //获取列明对应的值 if(colValue == null ){ continue; } if(namemethod != null){ locate = nameJudge(namemethod); } switch (locate) { //docmd5 case 0: String valString0 = colValue.toString(); Object nameFiled0 = CreateUUID.getUUID((String) valString0); System.out.println(nameFiled0); if(nameFiled0 != null){ row.put(col, nameFiled0); } break; case 1: String valString = colValue.toString(); Object nameFiled = nametransfer(valString , null); nameFiled = nameFiled + "我不好!"; System.out.println(nameFiled); if(nameFiled != null){ row.put(col, nameFiled); } break; case 2: String valString1 = colValue.toString(); Object nameFiled1 = valString1+"你好!"; System.out.println(nameFiled1); if(nameFiled1 != null){ row.put(col, nameFiled1); } break; default: break; } } return row; } private Integer nameJudge(String name) { Integer loca = Integer.valueOf(name); return loca; } private String nametransfer(String name , String [] arr){ String names = name; names = names.replaceAll("\\(旧\\)", ""); boolean flg = IsEnglish(name); if (!flg) names = names.replaceAll(" ", ""); else ; names = names.replace("***", " "); names = names.trim(); if (!names.equalsIgnoreCase("")) { names = StringTool.CharStandardization(names); } return names; } private boolean IsEnglish(String strRecord) { int length = strRecord.length(); for (int i = 0; i < length - 1; i++) { char c = strRecord.charAt(i); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == ' ' || (c >= '0' && c <= '9') || c == '*') { //continue; return true; } else { continue; } } return false; } }
<dataConfig>
<dataSource driver="oracle.jdbc.driver.OracleDriver" url="jdbc:oracle:thin:@//localhost:1521/ORCL" user="POI" password="POI" />
<document name="products"> <entity pk="NID" name="POIBASEINFO" query="select * from POIBASEINFO3" transformer="NameTransforms">
<field column="CITYCODENEW" name="id" namemethod="0" />
<field column="NID" name="poi_docmd5" namemethod="2" />
<field column="NAME" name="poi_name" namemethod="1" />
</entity>
</document>
</dataConfig>
{
"poi_name": [
"蒲我不好!"
],
"poi_keyword": [
"蒲我不好!"
],
"id": "40FE6006B666AB94",
"poi_docmd5": "1026675535你好!",
"_version_": 1512451070406688700
},