solr创建索引源码解析

先说下创建索引源码流程：

源码类：
1.CommonHttpSolrServer (SolrServer的子类)
2.SolrServer（抽象类）
3.SolrRequest (基类)
4.AbstractUpdateRequest (抽象类、SolrRequest的子类)
5.UpdateRequest (AbstractUpdateRequest的子类)
6.SolrInputDocument （设置需要索引的名称和值、这个应该放在第一位）

创建索引代码：

查询数据库数据，或者其他文档数据进行索引
[java] view plain copy
1. private void updateBook(String sql, String url, String idColumn,
2. String timeColumn,BufferedWriter dataFile) throws Exception {
3. long start = System.currentTimeMillis();
4. SolrUtil solrUtil = new SolrUtil(url);//初始化索引
5. SolrDocument doc = SqlSh.getSolrMaxDoc(solrUtil, idColumn, timeColumn);
6. if (doc == null) {
7. CommonLogger.getLogger().error("solr no data.");
8. return;
9. }
10. int maxId = Integer.parseInt(doc.get(idColumn).toString());
11. long maxTime = Long.parseLong(doc.get(timeColumn).toString())*1000;
12. Date maxDate = new Date(maxTime);
14. DateFormat dateFormat2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
15. //获取数据库需要索引的数据
16. ResultSet rs = stmt_m.executeQuery(String.format(sql,
17. dateFormat2.format(maxDate)));
18. //获取需要创建索引的key
19. initColumeMeta(rs.getMetaData());
21. //解析数据并索引
22. parseRs(rs, solrUtil);
24. rs.close();
26. //优化索引
27. solrUtil.server.optimize();
29. CommonLogger.getLogger().info(
30. "update book time:" + (System.currentTimeMillis() - start)
31. / 1000 + "s");
32. }
咱们看下上面代码的parseRs方法
[java] view plain copy
1. //下面是简单的解析数据方法并写索引
[java] view plain copy
1. private void parseRs(ResultSet rs, SolrUtil solrUtil) throws Exception {
2. Collection<SolrInputDocument> docs=new ArrayList<SolrInputDocument>();
3. SolrInputDocument doc = null;
4. int locBk = 0;
5. boolean flag=true;
6. StringBuilder sb=null;
7. String vl=null;
8. try {
9. while (rs.next()) {
10. doc = new SolrInputDocument();
11. for (int i = 0; i < ToolMain.columnNames.length; i++) {
12. doc.addField(
13. ToolMain.columnNames[i],
14. getColumnValue(
15. rs.getObject(ToolMain.columnNames[i]),
16. ToolMain.columnTypes[i]));//此方法为设置一个域，可以添加一个参数来设置权重
17. }
18. docs.add(doc);
19. locBk++;
20. if (docs.size() >= 1000) {
21. solrUtil.addDocList(docs);//创建索引和提交索引操作都在这里面
22. docs.clear();
23. }
24. }
25. if (docs.size() > 0) {
26. solrUtil.addDocList(docs);
27. docs.clear();
28. }
29. } catch (Exception e) {
30. throw e;
31. } finally {
32. docs.clear();
33. docs = null;
34. }
35. }
3.下面来说明下SolrUtil类，此类主要是封装了CommonHttpSolrServer
[java] view plain copy
1. import java.util.Collection;
3. import log.CommonLogger;
5. import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
6. import org.apache.solr.common.SolrInputDocument;
8. public class SolrUtil {
9. public CommonsHttpSolrServer server = null;
11. public String url = "";//url为solr服务的地址
12. public String shards = "";
14. public SolrUtil(String url) {
15. this.url = url;
16. initSolr();
17. }
18. public SolrUtil(String url,String shards) {
19. this.url = url;
20. this.shards=shards;
21. initSolr();
22. }
23. //初始化Server
24. private void initSolr() {
25. try {
26. server = new CommonsHttpSolrServer(url);
27. server.setSoTimeout(60*1000);
28. server.setConnectionTimeout(60*1000);
29. server.setDefaultMaxConnectionsPerHost(1000);
30. server.setMaxTotalConnections(1000);
31. server.setFollowRedirects(false);
32. server.setAllowCompression(true);
33. } catch (Exception e) {
34. e.printStackTrace();
35. System.exit(-1);
36. }
37. }
38. //封装了add、commit
39. public void addDocList(Collection<SolrInputDocument> docs) {
40. try {
41. server.add(docs);
42. server.commit();
43. docs.clear();//释放
44. } catch (Exception e) {
45. CommonLogger.getLogger().error("addDocList error.", e);
46. }
47. }
49. public void deleteDocByQuery(String query) throws Exception {
50. try {
51. server.deleteByQuery(query);
52. server.commit();
53. } catch (Exception e) {
54. CommonLogger.getLogger().error("deleteDocByQuery error.", e);
55. throw e;
56. }
57. }
58. }
4.现在来看看solr创建索引的源码

其实源码执行的操作无非是生成请求request 返回response

1.上面代码中的SolrInputDocument 类所做的操作
[java] view plain copy
1. public class SolrInputDocument implements Map<String,SolrInputField>, Iterable<SolrInputField>, Serializable //实现了Map和Iterable的接口并且实现了接口中的方法，其主要的类为SolrInputFiled类
[java] view plain copy
1. public class SolrInputField implements Iterable<Object>, Serializable //类中只有三个属性，String key，Object value，还包括评分 float boost = 1.0f; 默认是1.0f（如果做权重的话可以设置这个值）
再来看下执行的CommonHttpSolrServer类所做的操作（表现形式在SolrUtil中的addDocList）

2.添加文档方法
[java] view plain copy
1. public UpdateResponse add(Collection<SolrInputDocument> docs ) throws SolrServerException, IOException {
[java] view plain copy
1. UpdateRequest req = new UpdateRequest();//创建一个request
[java] view plain copy
1. req.add(docs);//调用UpdateRequest的add方法，添加索引文档
2. return req.process(this);//亲重点是这个方法（返回的是response）
3. }
[java] view plain copy
1. //再看下UpdateRequest的add方法
2. private List<SolrInputDocument> documents = null;
3. public UpdateRequest add( final Collection<SolrInputDocument> docs )
4. {
5. if( documents == null ) {
6. documents = new ArrayList<SolrInputDocument>( docs.size()+1 );
7. }
8. documents.addAll( docs );
9. return this;
10. }
3.提交方法 commit，调用的是SolrServer类中的
[java] view plain copy
1. public UpdateResponse commit( boolean waitFlush, boolean waitSearcher ) throws Solr ServerException, IOException {
2. return new UpdateRequest().setAction( UpdateRequest.ACTION.COMMIT, waitFlush, waitSearcher ).process( this );//看到了吗？
3. <pre class="brush:java; toolbar: true; auto-links: false;"> setAction都是为了对对象ModifiableSolrParams（这个对象在最终CommonHttpSolrServerrequest的request方法中用的到）</pre>
4. 在提交索引的时候也是调用的process方法
5. }
4.优化索引
[java] view plain copy
1. public UpdateResponse optimize(boolean waitFlush, boolean waitSearcher, int maxSegments ) throws SolrServerException, IOException {
2. sp; return new UpdateRequest().setAction( UpdateRequest.ACTION.OPTIMIZE, waitFlush, waitSearcher, maxSegments ).process( this );//同样调用process，通过setAction参数，在CommonHttpSolrServer类方法request（）中主要执行的是合并和压缩 setAction都是为了对对象ModifiableSolrParams（这个对象在最终CommonHttpSolrServer的request方法中用的到）进行赋值
3. }
5.既然上面都提到了process方法，那我们来看看
[java] view plain copy
1. @Override
2. public UpdateResponse process( SolrServer server ) throws SolrServerException, IOException
3. {
4. long startTime = System.currentTimeMillis();
5. UpdateResponse res = new UpdateResponse();
6. res.setResponse( server.request( this ) );//这里面这个方法可是重点之重啊，这是调用了 CommonHttpSolrServer类中的request方法
7. res.setElapsedTime( System.currentTimeMillis()-startTime );
8. return res;
9. }
6.最终的方法是SolrServer的子类CommonHttpSolrServer类的request方法，咋再来看看这个方法是怎么工作的
[java] view plain copy
1. public NamedList<Object> request(final SolrRequest request, ResponseParser processor ) throws SolrServerException, IOException {
3. HttpMethod method = null;
4. InputStream is = null;
5. SolrParams params = request.getParams();
6. Collection<ContentStream> streams = requestWriter.getContentStreams(request);
7. String path = requestWriter.getPath(request);
9. //创建索引进来的是/update /select 为查询
10. if( path == null || !path.startsWith( "/" ) ) {
11. path = "/select";
12. }
14. ResponseParser parser = request.getResponseParser();
15. if( parser == null ) {
16. parser = _parser;
17. }
19. // The parser 'wt=' and 'version=' params are used instead of the original params
20. ModifiableSolrParams wparams = new ModifiableSolrParams();
21. wparams.set( CommonParams.WT, parser.getWriterType() );
22. wparams.set( CommonParams.VERSION, parser.getVersion());
23. if( params == null ) {
24. params = wparams;
25. }
26. else {
27. params = new DefaultSolrParams( wparams, params );
28. }
30. if( _invariantParams != null ) {
31. params = new DefaultSolrParams( _invariantParams, params );
32. }
34. int tries = _maxRetries + 1;
35. try {
36. while( tries-- > 0 ) {
37. // Note: since we aren't do intermittent time keeping
38. // ourselves, the potential non-timeout latency could be as
39. // much as tries-times (plus scheduling effects) the given
40. // timeAllowed.
41. try {//通过使用查看solr源码，在使用UpdateRequest对象时会自动设置为Post
42. if( SolrRequest.METHOD.GET == request.getMethod() ) {
43. if( streams != null ) {
44. throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "GET can't send streams!" );
45. }
46. method = new GetMethod( _baseURL + path + ClientUtils.toQueryString( params, false ) );
47. }
48. else if( SolrRequest.METHOD.POST == request.getMethod() ) {//所以我们直接看
50. String url = _baseURL + path;
51. boolean isMultipart = ( streams != null && streams.size() > 1 );
53. if (streams == null || isMultipart) {
54. PostMethod post = new PostMethod(url);//设置post，包括request头部、内容、参数、等等一些操作
55. post.getParams().setContentCharset("UTF-8");
56. if (!this.useMultiPartPost && !isMultipart) {
57. post.addRequestHeader("Content-Type",
58. "application/x-www-form-urlencoded; charset=UTF-8");
59. }
61. List<Part> parts = new LinkedList<Part>();
62. Iterator<String> iter = params.getParameterNamesIterator();
63. while (iter.hasNext()) {
64. String p = iter.next();
65. String[] vals = params.getParams(p);
66. if (vals != null) {
67. for (String v : vals) {
68. if (this.useMultiPartPost || isMultipart) {
69. parts.add(new StringPart(p, v, "UTF-8"));
70. } else {
71. post.addParameter(p, v);
72. }
73. }
74. }
75. }
77. if (isMultipart) {
78. int i = 0;
79. for (ContentStream content : streams) {
80. final ContentStream c = content;
82. String charSet = null;
83. PartSource source = new PartSource() {
84. public long getLength() {
85. return c.getSize();
86. }
87. public String getFileName() {
88. return c.getName();
89. }
90. public InputStream createInputStream() throws IOException {
91. return c.getStream();
92. }
93. };
95. parts.add(new FilePart(c.getName(), source,
96. c.getContentType(), charSet));
97. }
98. }
99. if (parts.size() > 0) {
100. post.setRequestEntity(new MultipartRequestEntity(parts
101. .toArray(new Part[parts.size()]), post.getParams()));
102. }
104. method = post;
105. }
106. // It is has one stream, it is the post body, put the params in the URL
107. else {
108. String pstr = ClientUtils.toQueryString(params, false);
109. PostMethod post = new PostMethod(url + pstr);
111. // Single stream as body
112. // Using a loop just to get the first one
113. final ContentStream[] contentStream = new ContentStream[1];
114. for (ContentStream content : streams) {
115. contentStream[0] = content;
116. break;
117. }
118. if (contentStream[0] instanceof RequestWriter.LazyContentStream) {
119. post.setRequestEntity(new RequestEntity() {
120. public long getContentLength() {
121. return -1;
122. }
124. public String getContentType() {
125. return contentStream[0].getContentType();
126. }
128. public boolean isRepeatable() {
129. return false;
130. }
132. public void writeRequest(OutputStream outputStream) throws IOException {
133. ((RequestWriter.LazyContentStream) contentStream[0]).writeTo(outputStream);
134. }
135. }
136. );
138. } else {
139. is = contentStream[0].getStream();
140. post.setRequestEntity(new InputStreamRequestEntity(is, contentStream[0].getContentType()));
141. }
142. method = post;
143. }
144. }
145. else {
146. throw new SolrServerException("Unsupported method: "+request.getMethod() );
147. }
148. }
149. catch( NoHttpResponseException r ) {
150. // This is generally safe to retry on
151. method.releaseConnection();
152. method = null;
153. if(is != null) {
154. is.close();
155. }
156. // If out of tries then just rethrow (as normal error).
157. if( ( tries < 1 ) ) {
158. throw r;
159. }
160. //log.warn( "Caught: " + r + ". Retrying..." );
161. }
162. }
163. }
164. catch( IOException ex ) {
165. throw new SolrServerException("error reading streams", ex );
166. }
168. method.setFollowRedirects( _followRedirects );
169. method.addRequestHeader( "User-Agent", AGENT );
170. if( _allowCompression ) {
171. method.setRequestHeader( new Header( "Accept-Encoding", "gzip,deflate" ) );
172. }
174. try {
175. // Execute the method.
176. //System.out.println( "EXECUTE:"+method.getURI() );
177. //执行请求，返回状态码，然后组装response 最后返回
178. int statusCode = _httpClient.executeMethod(method);
179. if (statusCode != HttpStatus.SC_OK) {
180. StringBuilder msg = new StringBuilder();
181. msg.append( method.getStatusLine().getReasonPhrase() );
182. msg.append( "\n\n" );
183. msg.append( method.getStatusText() );
184. msg.append( "\n\n" );
185. msg.append( "request: "+method.getURI() );
186. throw new SolrException(statusCode, java.net.URLDecoder.decode(msg.toString(), "UTF-8") );
187. }
189. // Read the contents
190. String charset = "UTF-8";
191. if( method instanceof HttpMethodBase ) {
192. charset = ((HttpMethodBase)method).getResponseCharSet();
193. }
194. InputStream respBody = method.getResponseBodyAsStream();
195. // Jakarta Commons HTTPClient doesn't handle any
196. // compression natively. Handle gzip or deflate
197. // here if applicable.
198. if( _allowCompression ) {
199. Header contentEncodingHeader = method.getResponseHeader( "Content-Encoding" );
200. if( contentEncodingHeader != null ) {
201. String contentEncoding = contentEncodingHeader.getValue();
202. if( contentEncoding.contains( "gzip" ) ) {
203. //log.debug( "wrapping response in GZIPInputStream" );
204. respBody = new GZIPInputStream( respBody );
205. }
206. else if( contentEncoding.contains( "deflate" ) ) {
207. //log.debug( "wrapping response in InflaterInputStream" );
208. respBody = new InflaterInputStream(respBody);
209. }
210. }
211. else {
212. Header contentTypeHeader = method.getResponseHeader( "Content-Type" );
213. if( contentTypeHeader != null ) {
214. String contentType = contentTypeHeader.getValue();
215. if( contentType != null ) {
216. if( contentType.startsWith( "application/x-gzip-compressed" ) ) {
217. //log.debug( "wrapping response in GZIPInputStream" );
218. respBody = new GZIPInputStream( respBody );
219. }
220. else if ( contentType.startsWith("application/x-deflate") ) {
221. //log.debug( "wrapping response in InflaterInputStream" );
222. respBody = new InflaterInputStream(respBody);
223. }
224. }
225. }
226. }
227. }
228. return processor.processResponse(respBody, charset);
229. }
230. catch (HttpException e) {
231. throw new SolrServerException( e );
232. }
233. catch (IOException e) {
234. throw new SolrServerException( e );
235. }
236. finally {
237. method.releaseConnection();
238. if(is != null) {
239. is.close();
240. }
241. }
242. }