elasticsearch 导入基础数据并索引之 geo_point

elasticsearch 中的地理信息存储, 有geo_point形式和geo_shape两种形式

此篇只叙述geo_point, 

地理位置需要声明为特殊的类型, 不显示在mapping中定义的话, 需要

{
    "pin" : {
        "location" : {
            "lat" : 40.12,
            "lon" : -71.34
        },
        "tag" : ["food", "family"],
        "text" : "my favorite family restaurant"
    }
}

如果仍然要显示的在mapping中定义, 则需要将其声明为 geo_point格式

{
    "pin" : {
        "properties" : {
            "location" : {
                "type" : "geo_point"
            }
        }
    }
}

es的类型有: string, long, date, geo_point, 以后知道了在补充, text(for binary), 

  range(integer_range, float_range, long_range, double_range, date_range)

  boolean, geo_point, geo_shape, ip, keyword, nested, token_count.. 可以参见这儿

多说一句: location的数据存放有3种形式: 

  

    1), location: lat + "," + lon    // 最开始用的这个, 但是做 geoHashCellQuery查询测试时, 报错了
  2) location: {

      "lat": ..., 
      "lon": ...    
    }               // 这个是我使用的导入方式
  3), location: [lon, lat]      // 这个没用, 没测试, 没发言权     

 

 1, 导入查询数据, 使用的建立mapping的方式, 因为需要声明ik分词器

package com.iwhere.geosearch;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.junit.Before;
import org.junit.Test;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.yaml.snakeyaml.tokens.StreamStartToken;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

/**
 * 从json文件中导入数据到es中
 * @author 231
 *
 */
public final class ImportData {

    private TransportClient client;
    
    @Test
    public void test1() throws Exception {
        createMapping("test", "catchModel");
        importData("test", "catchModel");
        System.out.println("success");
    }
    
    /**
     * 插入https://www.elastic.co/blog/geo-location-and-search的测试数据
     * @throws Exception 
     */
    @Test
    public void test2() throws Exception {
        String index = "geo";
        String type = "test";
        BulkRequestBuilder prepareBulk = client.prepareBulk();
        for (int i = 0; i < 50; i++) {
            XContentBuilder source = getJson(40 + i, -71.34 + i, "my favorite family restaurant");
            prepareBulk.add(client.prepareIndex(index, type).setSource(source));
        }
        BulkResponse response = prepareBulk.execute().actionGet();
    }
    
    public XContentBuilder getJson(double lat, double lon, String text) throws IOException {
        return XContentFactory.jsonBuilder()
                .startObject()
                    .startObject("pin")
                        .startObject("location").field("lat", lat).field("lon", lon).endObject()
                        .field("tag", "food", "family")
                        .field("text", text)
                    .endObject()
                .endObject();
    }
    
    
    /**
     * 导入数据
     * @throws Exception 
     */
    public void importData(String index, String type) throws Exception {
        BufferedReader br = new BufferedReader(new FileReader(new File("D://catchModel.json")));
        StringBuilder sb = new StringBuilder();
        String line = null;
        while((line = br.readLine()) != null) {
            sb.append(line);
        }
        
        BulkRequestBuilder prepareBulk = client.prepareBulk();
        JSONArray parseArray = JSON.parseArray(sb.toString());
        for (Object object : parseArray) {
            // 强转为map, 否则报错  the number of object passed must be even
            Map<String, Object> source = (Map<String, Object>) object;
//            IndexResponse response = client.prepareIndex(index, type).setSource(source).get();
            XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
                .startObject()
                    .field("taskName", source.get("taskName"))
                    .field("sessionId", source.get("sessionId"))
                    .field("geoNum", source.get("geoNum"))
                    .field("geoLevel", source.get("geoLevel"))
                    .field("createTime", source.get("createTime"))
                    .field("endTime", source.get("endTime"))
                    .startObject("location").field("lat", source.get("lbLat"))
                            .field("lon", source.get("lbLng"))
//                    .field("location", source.get("lbLat") + "," + source.get("lbLng"))
//                            XContentFactory.jsonBuilder()
//                                        .startObject()
//                                            .field("lat", source.get("lbLat"))
//                                            .field("lon", source.get("lblng"))
//                                        .endObject())
                    .endObject();
            prepareBulk.add(client.prepareIndex(index, type).setSource(xContentBuilder));
        }
        BulkResponse response = prepareBulk.get();
        System.out.println(response);
    }
    
    
    /**
     * 创建mapping, 添加ik分词器等, 相当于创建数据库表
     * 索引库名: indices
     * 类型      : mappingType
     * field("indexAnalyzer", "ik"): 字段分词ik索引
     * field("searchAnalyzer", "ik"): ik分词查询
     * @throws Exception 
     */
    public void createMapping(String indices, String type) throws Exception {
        
        // 创建index
        Map<String, Object> settings = new HashMap<>();
        settings.put("number_of_shards", 4);    // 分片数量
        settings.put("number_of_replicas", 0);    // 复制数量, 导入时最好为0, 之后2-3即可
        settings.put("refresh_interval", "10s");// 刷新时间
        
        CreateIndexRequestBuilder prepareCreate = client.admin().indices().prepareCreate(indices);
        prepareCreate.setSettings(settings);
        
        // 创建mapping
        XContentBuilder mapping = XContentFactory.jsonBuilder()
            .startObject()
                .startObject(type)
//                    .startObject("_ttl")//有了这个设置,就等于在这个给索引的记录增加了失效时间,  
//                    //ttl的使用地方如在分布式下,web系统用户登录状态的维护.  
//                        .field("enabled", true)//默认的false的  
//                        .field("default", "5m")//默认的失效时间,d/h/m/s 即天/小时/分钟/秒  
//                        .field("store", "yes")  
//                        .field("index", "not_analyzed")  
//                    .endObject() 
//                     .startObject("_timestamp")//这个字段为时间戳字段.即你添加一条索引记录后,自动给该记录增加个时间字段(记录的创建时间),搜索中可以直接搜索该字段.  
//                        .field("enabled", true)  
//                        .field("store", "no")  
//                        .field("index", "not_analyzed")  
//                    .endObject() 
                .startObject("properties")
                .startObject("taskName").field("type", "string").field("analyzer", "ik").field("searchAnalyzer", "ik").endObject()
                .startObject("sessionId").field("type", "string").endObject()
                .startObject("geoNum").field("type", "string").endObject()
                .startObject("grandPaGeoNum").field("type", "string").endObject()
                .startArray("sonGeoNum").endArray()
                .startObject("geoLevel").field("type", "long").endObject()
                .startObject("state").field("type", "long").endObject()
                .startObject("createTime").field("type", "date").endObject()
                .startObject("endTime").field("type", "date").endObject()
                .startObject("location")
                    .field("type", "geo_point").field("geohash_prefix", true).field("geohash_precision", "1km").endObject()/*.field("lat_lon", true)*/.endObject()
            .endObject().endObject();
        System.out.println(mapping.string());
//        PutMappingResponse actionGet = client.admin().|indices().preparePutMapping(indices).setType(indices).setSource(mapping).execute().actionGet();
        prepareCreate.addMapping(type, mapping);
        CreateIndexResponse response = prepareCreate.execute().actionGet();
        System.out.println(response);
    }
    
    @Before
    public void before() {
        ClassPathXmlApplicationContext context = new ClassPathXmlApplicationContext("classpath:spring/applicationContxt-escluster.xml");
        client = context.getBean(TransportClient.class);
    } 
}

2, 地理位置查询

package com.iwhere.geosearch;

import java.net.InetSocketAddress;

import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.geo.GeoDistance;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.common.geo.builders.ShapeBuilder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.common.unit.DistanceUnit;
import org.elasticsearch.index.query.GeoBoundingBoxQueryBuilder;
import org.elasticsearch.index.query.GeoDistanceQueryBuilder;
import org.elasticsearch.index.query.GeoDistanceRangeQueryBuilder;
import org.elasticsearch.index.query.GeoPolygonQueryBuilder;
import org.elasticsearch.index.query.GeoShapeQueryBuilder;
import org.elasticsearch.index.query.GeohashCellQuery.Builder;
import org.elasticsearch.index.query.QueryBuilders;
import org.junit.Before;
import org.junit.Test;

/**
 * 使用dsl查询
 * @author 231
 */
public class JavaESGEO {

    private TransportClient client;
    
    /**
     * Caused by: [test] QueryParsingException[Field [location] is not a geo_shape]
   * 报错, 没运行出来
*/ @Test public void testGeoShapeQuery() { GeoShapeQueryBuilder geoShapeQuery = QueryBuilders.geoShapeQuery("location", ShapeBuilder.newMultiPoint() .point(0, 0) .point(0, 10) .point(10, 10) .point(10, 0) .point(0, 0) , ShapeRelation.WITHIN); System.out.println(geoShapeQuery); SearchResponse response = client.prepareSearch("geo") .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) .setQuery(geoShapeQuery).get(); } /** * Caused by: java.lang.IllegalStateException: Shape with name [AVqw3mb-kOe4Yke4p-lh] found but missing shape field */ @Test public void testGeoShapeQuery1() { GeoShapeQueryBuilder queryBuilder = QueryBuilders.geoShapeQuery( "model.location", // field "AVqxrMyikOe4Yke4p_Wx", // id of document "catchModel", ShapeRelation.WITHIN) // type, relation .indexedShapeIndex("test") // name of index .indexedShapePath("location"); // filed specified as path SearchResponse response = client.prepareSearch() .setQuery(queryBuilder).execute().actionGet(); String string = response.getHits().getHits().toString(); System.out.println(string); } /** * 使用 BoundingBoxQuery进行查询 */ @Test public void testGeoBoundingBoxQuery( ){ GeoBoundingBoxQueryBuilder queryBuilder = QueryBuilders.geoBoundingBoxQuery("location") .topRight(40.0, 117) .bottomLeft(39.9, 116); SearchResponse searchResponse = client.prepareSearch("test") .setQuery(queryBuilder).get(); System.out.println(searchResponse); System.err.println(searchResponse.getHits().totalHits()); } /** * distance query 查询 */ @Test public void testDistanceQuery() { GeoDistanceQueryBuilder queryBuilder = QueryBuilders.geoDistanceQuery("location") .point(40, 116.5) .distance(20, DistanceUnit.KILOMETERS) .optimizeBbox("memory") .geoDistance(GeoDistance.ARC); SearchResponse response = client.prepareSearch("geo", "test") .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) .setQuery(queryBuilder).execute().actionGet(); System.out.println(response); System.err.println(response.getHits().totalHits()); } /** * 环形查询 */ @Test public void testDistanceRangeQuery() { GeoDistanceRangeQueryBuilder queryBuilder = QueryBuilders.geoDistanceRangeQuery("location") .point(40, 116.5) // 中心点 .from("20km") // 内环 .to("25km") //外环 .includeLower(true) // 包含上届 .includeUpper(true) // 包含下届 .optimizeBbox("memory") // 边界框 .geoDistance(GeoDistance.SLOPPY_ARC); SearchResponse response = client.prepareSearch("test") .setSearchType(SearchType.DFS_QUERY_AND_FETCH) .setQuery(queryBuilder).execute().actionGet(); System.out.println(response); System.out.println(response.getHits().totalHits()); } /** * 多边形查询 */ @Test public void testPolygonQuery() { GeoPolygonQueryBuilder queryBuilder = QueryBuilders.geoPolygonQuery("location") .addPoint(39, 116) .addPoint(39, 117) .addPoint(40, 117); SearchResponse response = client.prepareSearch("test", "geo") .setQuery(queryBuilder).get(); System.out.println(response); System.err.println(response.getHits().totalHits()); } /** * geoHash查询 * 要使用, 需要先开启 * "location": { "type": "geo_point", "geohash_prefix": true, "geohash_precision": "1km" // 精度, 可在mapping中指定, 也可在代码中指定 */ @Test public void testGeoHashCellQuery() { Builder precision = QueryBuilders.geoHashCellQuery("location", new GeoPoint(39.9, 116)) .neighbors(true) .precision(3); SearchResponse response = client.prepareSearch("test") .setQuery(precision).get(); System.out.println(response); System.err.println(response.getHits().totalHits()); } @Before public void testBefore() { Settings settings = Settings.settingsBuilder().put("cluster.name", "wenbronk_escluster") .put("client.transport.sniff", true).build(); client = TransportClient.builder().settings(settings).build() .addTransportAddress(new InetSocketTransportAddress(new InetSocketAddress("192.168.50.37", 9300))); System.out.println("success to connect escluster"); } }

其他配置信息见: spring整合java一章

 基础数据从mongodb中拷贝来的, 在github有一个小量的数据

posted @ 2017-03-09 15:35  bronk  阅读(16158)  评论(0编辑  收藏  举报