Solr5 DataImport 处理1对多关系
“问题” 和“派发处理” 是2个对象。 每个问题可以被派发N次, 最后结束问题。
实际查询中数据量可能很大,同时即可以从问题查派发部门, 也可以从派发情况查询问题(如本部门处理的问题)
使用的技巧就是:增加doc_id, doc_type字段。 实现多Entity查询。
schema.xml
1 <?xml version="1.0" encoding="UTF-8" ?> 2 <schema name="uum" version="1.2"> 3 <types> 4 <fieldType name="boolean" class="solr.BoolField"/> 5 <fieldType name="date" class="solr.TrieDateField"/> 6 <fieldType name="float" class="solr.TrieFloatField"/> 7 <fieldType name="int" class="solr.TrieIntField"/> 8 <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> 9 <fieldType name="string" class="solr.StrField"/> 10 <fieldType name="url" class="solr.StrField" indexed="false" stored="true" /> 11 12 <fieldType name="simpletext" 13 class="solr.TextField" 14 positionIncrementGap="100"> 15 <analyzer> 16 <tokenizer class="solr.StandardTokenizerFactory"/> 17 <filter class="solr.LowerCaseFilterFactory"/> 18 </analyzer> 19 </fieldType> 20 21 <fieldType name="ignored" class="solr.StrField" 22 indexed="false" stored="false" /> 23 24 <fieldType name="date_l" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> 25 26 </types> 27 28 <fields> 29 30 <!-- 31 FIELDS THAT ARE IN DOCS OF MULTIPLE TYPES 32 --> 33 34 <!-- this will be our uniqueKey, so it has to be distinct across 35 all types of documents 36 --> 37 <field name="doc_id" type="string" /> 38 39 <!-- the type (or domain) of our document --> 40 <field name="doc_type" type="string" /> 41 42 <!-- external URLs --> 43 <dynamicField name="*_url" type="url" multiValued="false" /> 44 <dynamicField name="*_urls" type="url" multiValued="true"/> 45 46 <!-- dates --> 47 <dynamicField name="*_dt" type="date" /> 48 49 <!-- numeric values that might come in hand for relevancy biasing 50 (they all relate to popularity) 51 --> 52 <dynamicField name="*_count" type="int" multiValued="false"/> 53 54 <field name="_version_" type="long" indexed="true" stored="true"/> 55 <field name="_root_" type="string" indexed="true" stored="false"/> 56 <!-- Field used by Suggester for autocompletion --> 57 <field name="autocomplete" 58 type="simpletext" 59 stored="false" 60 multiValued="true" /> 61 62 <!-- quick search field --> 63 <field name="catchall" 64 type="simpletext" 65 stored="false" 66 omitNorms="true" 67 multiValued="true" /> 68 <field name="ID" type="string" multiValued="false"/> 69 70 <!-- 71 PETITION 72 --> 73 <field name="TenantId" type="string" multiValued="false"/> 74 <field name="PetitionId" type="string" multiValued="false"/> 75 <field name="PetitionNumber" type="string" multiValued="false"/> 76 <field name="Title" type="simpletext" multiValued="false"/> 77 <field name="Content" type="simpletext" multiValued="false"/> 78 <field name="Tel" type="string" multiValued="false"/> 79 <field name="EventAddress" type="simpletext" multiValued="false"/> 80 <field name="DutyGridName" type="string" multiValued="false"/> 81 <field name="ComplaintType" type="string" multiValued="false"/> 82 <field name="IsVoid" type="boolean" multiValued="false"/> 83 <field name="IsEnd" type="boolean" multiValued="false"/> 84 <field name="GridAddress" type="simpletext" multiValued="false"/> 85 <field name="CategoryName" type="string" multiValued="false"/> 86 <field name="Category" type="string" multiValued="false"/> 87 88 <field name="Status" type="string" multiValued="false"/> 89 <field name="RegisterOn" type="date" multiValued="false"/> 90 <field name="DeadLine" type="date" multiValued="false"/> 91 <field name="ReportOn" type="date" multiValued="false"/> 92 <field name="EndCaseOn" type="date" multiValued="false"/> 93 <field name="CreatedBy" type="string" multiValued="false"/> 94 <field name="SourceWay" type="string" multiValued="false"/> 95 <field name="ISWGXTSB" type="string" multiValued="false"/> 96 <field name="RegisterOffice" type="string" multiValued="false"/> 97 <field name="EventLevel" type="string" multiValued="false"/> 98 <field name="ImportantLevel" type="string" multiValued="false"/> 99 100 <!-- 101 PETITION/DISPATCH 102 --> 103 <field name="DispatchOffices" type="string" multiValued="true"/> 104 <!--<field name="DispatchOfficeNames" />--> 105 <field name="ReceiveOffices" type="string" multiValued="true"/> 106 <field name="ReceiveOfficeNames" type="string" multiValued="true"/> 107 108 109 <!-- 110 PETITION/PARTICIPANT 111 --> 112 <field name="OrgUnits" type="string" multiValued="true"/> 113 <field name="Participants" type="string" multiValued="true"/> 114 115 <!-- 116 DISPATCH 117 --> 118 119 <field name="Dispatcher" type="string" multiValued="false"/> 120 <field name="DispatchOn" type="date" multiValued="false"/> 121 <field name="DispatchOffice" type="string" multiValued="false"/> 122 <field name="DispatchOfficeName" type="string" multiValued="false"/> 123 <field name="ReceiveOffice" type="string" multiValued="false"/> 124 <field name="ReceiveOfficeName" type="string" multiValued="false"/> 125 <field name="StartOn" type="date" multiValued="false"/> 126 <field name="DealWay" type="string" multiValued="false"/> 127 <field name="FeedBackType" type="string" multiValued="false"/> 128 <field name="FeedBackPeople" type="string" multiValued="false"/> 129 <field name="FeedBackOn" type="date" multiValued="false"/> 130 <field name="FeedBackMsg" type="simpletext" multiValued="false"/> 131 <field name="NoPublicOpinion" type="simpletext" multiValued="false"/> 132 <field name="IsPublic" type="boolean" multiValued="false"/> 133 <field name="IsAlreadyReply" type="boolean" multiValued="false"/> 134 <field name="IsAlreadyContact" type="boolean" multiValued="false"/> 135 136 </fields> 137 138 139 <!-- copy author names and title titles to a field to autocomplete 140 <copyField source="canonical_name" dest="autocomplete"/> 141 <copyField source="title" dest="autocomplete"/> --> 142 143 <!-- copy everything into one big field for easy searching --> 144 <copyField source="PetitionNumber" dest="catchall"/> 145 <copyField source="Title" dest="catchall"/> 146 <copyField source="Content" dest="catchall"/> 147 <copyField source="Tel" dest="catchall"/> 148 <copyField source="EventAddress" dest="catchall"/> 149 150 <!-- A unique Key field isn't neccessary, but it's the only way Solr --> 151 <!-- can automaticly replace docs when they change --> 152 <!-- DataImportHandler is also very unhappy if you don't have one --> 153 <uniqueKey>doc_id</uniqueKey> 154 155 <!-- It's a *very* good idea to have a default search field --> 156 <defaultSearchField>catchall</defaultSearchField> 157 158 </schema>
db-data-config.xml
1 <dataConfig> 2 <dataSource type="JdbcDataSource" 3 driver="oracle.jdbc.driver.OracleDriver" 4 url="jdbc:oracle:thin:@192.168.0.0:1521:test" 5 user="user" 6 password="pwd" 7 /> 8 <document> 9 <entity name="petition" 10 pk="ID" 11 transformer="TemplateTransformer" 12 query="select * from VW_HIS_EventInfo " 13 deltaImportQuery="select t.* from VW_HIS_EventInfo t where t.id='${dataimporter.delta.ID}'" 14 deltaQuery="select t.* from VW_HIS_EventInfo t where 15 RegistorOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') 16 or ReportOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') 17 or endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') "> 18 > 19 <field column="doc_id" template="PE_${petition.ID}" /> 20 <field column="doc_type" template="PE" /> 21 22 <!--<field column="LATLON" name="LatLon_p"/>--> 23 <field column="TENANTID" name="TenantId" /> 24 <field column="ID" name="PetitionId" /> 25 <field column="PETITIONNUMBER" name="PetitionNumber" /> 26 <field column="TITLE" name="Title" /> 27 <field column="CONTENT" name="Content" /> 28 <field column="TEL" name="Tel" /> 29 <field column="EVENTADDRESS" name="EventAddress" /> 30 <!--<field column="AREANAME" name="AreaName" />--> 31 <field column="DUTYGRIDNAME" name="DutyGridName" /> 32 <field column="GRIDADDRESS" name="GridAddress" /> 33 <field column="COMPLAINTQUALITYNAME" name="ComplaintType" /> 34 <field column="ISVOID" name="IsVoid" /> 35 <field column="ISEND" name="IsEnd" /> 36 <field column="CATEGORYNAME" name="CategoryName" /> 37 <field column="CATEGORYCODE" name="Category" /> 38 <field column="STATUS" name="Status" /> 39 <field column="REGISTORON" name="RegisterOn" /> 40 <field column="DEADLINE" name="DeadLine" /> 41 <field column="CREATEDBY" name="CreatedBy" /> 42 <field column="REPORTON" name="ReportOn" /> 43 <field column="SOURCEWAY" name="SourceWay" /> 44 <field column="ISWGXTSB" name="ISWGXTSB" /> 45 <field column="REGISTOROFFICE" name="RegisterOffice" /> 46 <!--<field column="TOOFFICENAME" name="ToOfficeName" />--> 47 <field column="EVENTLEVEL" name="EventLevel" /> 48 <field column="IMPORTANTLEVEL" name="ImportantLevel" /> 49 <field column="ENDCASEON" name="EndCaseOn" /> 50 <!--<field column="ENDOPINION" name="EndOpinion" />--> 51 52 <entity name="petition_dispatch" 53 pk="ID" 54 query="select * from VW_HIS_DispatchInfo where petitionid='${petition.ID}'" 55 deltaQuery="select ID from VW_HIS_DispatchInfo where 56 DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') 57 or FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')" 58 parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petition_dispatch.PETITIONID}'"> 59 <!--<field column="DISPATCHER" name="Dispatchers" />--> 60 61 <field column="DISPATCHOFFICE" name="DispatchOffices" /> 62 <!--<field column="DISPATCHOFFICENAME" name="DispatchOfficeNames" />--> 63 <field column="RECEIVEOFFICE" name="ReceiveOffices" /> 64 <field column="RECEIVEOFFICENAME" name="ReceiveOfficeNames" /> 65 </entity> 66 67 <entity name="petiton_participant" 68 pk="PARTICIPANT" 69 query="select distinct ORGUNIT,PARTICIPANT from vw_his_participant where petitionid='${petition.ID}'" 70 deltaQuery="select PARTICIPANT from vw_his_participant where HandleOn > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') " 71 parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petiton_participant.PETITIONID}'"> 72 <!--<field column="HANDLEON" name="HandleOns"/>--> 73 <field column="ORGUNIT" name="OrgUnits"/> 74 <!--<field column="PARTICIPANTNAME" name="ParticipantNames"/>--> 75 <field column="PARTICIPANT" name="Participants"/> 76 <!--<field column="PARTICIPANTTYPE" name="ParticipantTypes"/>--> 77 </entity> 78 79 </entity> 80 81 <entity name="dispatch" 82 pk="ID" 83 transformer="TemplateTransformer" 84 query="select * from VW_HIS_DispatchInfo" 85 deltaImportQuery="select t.* from VW_HIS_DispatchInfo t where t.id='${dataimporter.delta.ID}'" 86 deltaQuery="select ID from VW_HIS_DispatchInfo where DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') 87 or FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"> 88 89 <field column="doc_id" template="DIS_${dispatch.ID}" /> 90 <field column="doc_type" template="DIS" /> 91 92 <field column="TENANTID" name="TenantId" /> 93 <field column="PETITIONID" name="PetitionId" /> 94 <field column="DISPATCHER" name="Dispatcher" /> 95 <field column="DISPATCHON" name="DispatchOn" /> 96 <field column="DISPATCHOFFICE" name="DispatchOffice" /> 97 <field column="DISPATCHOFFICENAME" name="DispatchOfficeName" /> 98 <field column="RECEIVEOFFICE" name="ReceiveOffice" /> 99 <field column="RECEIVEOFFICENAME" name="ReceiveOfficeName" /> 100 <field column="STARTON" name="StartOn" /> 101 <field column="DEADLINE" name="DeadLine" /> 102 <field column="DEALWAY" name="DealWay" /> 103 <field column="STATUS" name="Status" /> 104 <field column="FEEDBACKTYPE" name="FeedBackType" /> 105 <field column="FEEDBACKPEOPLE" name="FeedBackPeople" /> 106 <field column="FEEDBACKON" name="FeedBackOn" /> 107 <field column="FEEDBACKMSG" name="FeedBackMsg" /> 108 <field column="NOPUBLICOPINION" name="NoPublicOpinion" /> 109 <field column="ISPUBLIC" name="IsPublic" /> 110 <field column="ISALREADYREPLY" name="IsAlreadyReply" /> 111 <field column="ISALREADYCONCAT" name="IsAlreadyContact" /> 112 113 <entity name="dispatch_petition" 114 pk="ID" 115 query="select * from VW_HIS_EventInfo where id='${dispatch.PETITIONID}'" 116 deltaQuery="select ID from VW_HIS_EventInfo where endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')" 117 parentDeltaQuery="select ID from VW_HIS_DispatchInfo where PETITIONID='${dispatch_petition.ID}'"> 118 <field column="PETITIONNUMBER" name="PetitionNumber" /> 119 <field column="TITLE" name="Title" /> 120 <field column="CONTENT" name="Content" /> 121 <field column="TEL" name="Tel" /> 122 <field column="EVENTADDRESS" name="EventAddress" /> 123 <!--<field column="AREANAME" name="AreaName" />--> 124 <field column="DUTYGRIDNAME" name="DutyGridName" /> 125 <field column="GRIDADDRESS" name="GridAddress" /> 126 <field column="COMPLAINTQUALITYNAME" name="ComplaintType" /> 127 <field column="CATEGORYNAME" name="CategoryName" /> 128 <field column="CATEGORYCODE" name="Category" /> 129 </entity> 130 </entity> 131 132 </document> 133 </dataConfig>