一、Maven依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.sdu.lucene</groupId>
    <artifactId>lucene-learn</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <lucene.version>6.1.0</lucene.version>
        <spatial4j.version>0.6</spatial4j.version>
        <guava.version>19.0</guava.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>${guava.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>${lucene.version}</version>
        </dependency>

        <!-- Lucene分词 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>${lucene.version}</version>
        </dependency>


        <!-- 地理坐标支持 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-spatial</artifactId>
            <version>${lucene.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-spatial-extras</artifactId>
            <version>${lucene.version}</version>
        </dependency>

        <!--
            Spatial4j是一个通用的空间/地理空间ASL许可的开源Java库,它的核心能力有3个方面:
            1 : 提供公共图形,可工作在Euclidean和geodesic(球体的表面)的世界模型
            2 : 提供距离计算和其它数学计算
            3 : 从WKT 格式化字符串来读取形状
        -->
        <dependency>
            <groupId>org.locationtech.spatial4j</groupId>
            <artifactId>spatial4j</artifactId>
            <version>${spatial4j.version}</version>
        </dependency>
    </dependencies>

    <build>
        <finalName>lucene-learn</finalName>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.5</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

 

二、Lucene Spatial

/**
 * @author hanhan.zhang
 * */
public class LuceneSpatial {

    /**
     * Spatial4j上下文
     * 1: SpatialContext初始化可由SpatialContextFactory配置
     * 2: SpatialContext属性
     *          DistanceCalculator(默认使用GeodesicSphereDistCalc.Haversine,将地球视为标准球体)
     *          ShapeFactory(默认使用ShapeFactoryImpl)
     *          Rectangle(构建经纬度空间:RectangleImpl(-180, 180, -90, 90, this))
     *          BinaryCodec()
     * */
    private SpatialContext ctx;

    /**
     * 索引和查询模型的策略接口
     * */
    private SpatialStrategy strategy;

    /**
     * 索引存储目录
     * */
    private Directory directory;

    protected void init() {
        /**
         * SpatialContext也可以通过SpatialContextFactory工厂类来构建
         * */
        this.ctx = SpatialContext.GEO;

        /**
         * 网格最大11层或Geo Hash的精度
         * 1: SpatialPrefixTree定义的Geo Hash最大精度为24
         * 2: GeohashUtils定义类经纬度到Geo Hash值公用方法
         * */
        SpatialPrefixTree spatialPrefixTree = new GeohashPrefixTree(ctx, 11);

        /**
         * 索引和搜索的策略接口,两个主要实现类
         * 1: RecursivePrefixTreeStrategy(支持任何Shape的索引和检索)
         * 2: TermQueryPrefixTreeStrategy(仅支持Point Shape)
         * 上述两个类继承PrefixTreeStrategy(有使用缓存)
         * */
        this.strategy = new RecursivePrefixTreeStrategy(spatialPrefixTree, "location");
        // 初始化索引目录
        this.directory = new RAMDirectory();
    }

    protected void createIndex(List<CityGeoInfo> cityGeoInfos) throws Exception {
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
        IndexWriter indexWriter = new IndexWriter(directory, config);
        indexWriter.addDocuments(newSampleDocument(ctx, strategy, cityGeoInfos));
        indexWriter.close();
    }

    /**
     * 创建Document索引对象
     */
    protected List<Document> newSampleDocument(SpatialContext ctx, SpatialStrategy strategy, List<CityGeoInfo> cityGeoInfos) {
        List<Document> documents = Lists.newLinkedList(cityGeoInfos.stream()
                                                                   .map(cgi -> {
                                                                       Document doc = new Document();
                                                                       doc.add(new StoredField("id", cgi.getCityId()));
                                                                       doc.add(new NumericDocValuesField("id", cgi.getCityId()));
                                                                       doc.add(new StringField("city", cgi.getName(), Field.Store.YES));
                                                                       Shape shape = null;
                                                                       /**
                                                                        * 对小于MaxLevel的Geo Hash构建Field(IndexType[indexed,tokenized,omitNorms])
                                                                        * */
                                                                       Field []fields = strategy.createIndexableFields((shape = ctx.getShapeFactory()
                                                                                                                                   .pointXY(cgi.getLnt(), cgi.getLat())));
                                                                       for (Field field : fields) {
                                                                           doc.add(field);
                                                                       }
                                                                       Point pt = (Point) shape;
                                                                       doc.add(new StoredField(strategy.getFieldName(), pt.getX() + ","+ pt.getY()));
                                                                       return doc;
                                                                   })
                                                                   .collect(Collectors.toList()));
      return documents;
    }

    /**
     * 地理位置搜索
     * @throws Exception
     */
    public void search() throws Exception {
        IndexReader indexReader = DirectoryReader.open(directory);
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        /**
         * 按照id升序排序
         * */
        Sort idSort = new Sort(new SortField("id", SortField.Type.INT));

        /**
         * 搜索方圆100千米范围以内,以当前位置经纬度(120.33,36.07)青岛为圆心,其中半径为100KM
         * */
        SpatialArgs args = new SpatialArgs(SpatialOperation.Intersects,
                                           ctx.getShapeFactory().circle(120.33, 36.07, DistanceUtils.dist2Degrees(100, DistanceUtils.EARTH_MEAN_RADIUS_KM)));
        Query query = strategy.makeQuery(args);
        TopDocs topDocs = indexSearcher.search(query, 10, idSort);
        /**
         * 输出命中结果
         * */
        printDocument(topDocs, indexSearcher, args.getShape().getCenter());

        System.out.println("==========================华丽的分割线=========================");

        /**
         * 定义坐标点(x,y)即(经度,纬度)即当前用户所在地点(烟台)
         * */
        Point pt = ctx.getShapeFactory().pointXY(121.39,37.52);

        /**
         * 计算当前用户所在坐标点与索引坐标点中心之间的距离即当前用户地点与每个待匹配地点之间的距离,DEG_TO_KM表示以KM为单位
         * 对Field(name=location)字段检索
         * */
        ValueSource valueSource = strategy.makeDistanceValueSource(pt, DistanceUtils.DEG_TO_KM);
        
        /**
         * 根据命中点与当前位置坐标点的距离远近降序排,距离数字大的排在前面,false表示降序,true表示升序
         * */
        Sort distSort = new Sort(valueSource.getSortField(false)).rewrite(indexSearcher);
        TopDocs topdocs = indexSearcher.search(new MatchAllDocsQuery(), 10, distSort);
        printDocument(topdocs, indexSearcher, pt);
        indexReader.close();
    }

    protected void printDocument(TopDocs topDocs, IndexSearcher indexSearcher, Point point) throws Exception {
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (ScoreDoc scoreDoc : scoreDocs) {
            int docId = scoreDoc.doc;
            Document document = indexSearcher.doc(docId);
            int cityId = document.getField("id").numericValue().intValue();
            String city = document.getField("city").stringValue();
            String location = document.getField(strategy.getFieldName()).stringValue();
            String []locations = location.split(",");
            double xPoint = Double.parseDouble(locations[0]);
            double yPoint = Double.parseDouble(locations[1]);
            double distDEG = ctx.calcDistance(point, xPoint, yPoint);
            double juli = DistanceUtils.degrees2Dist(distDEG, DistanceUtils.EARTH_MEAN_RADIUS_KM);
            System.out.println("docId=" + docId + "\tcityId=" + cityId + "\tcity=" + city + "\tdistance=" + juli + "KM");
        }
    }

    public static void main(String[] args) throws Exception {
        LuceneSpatial luceneSpatial = new LuceneSpatial();
        luceneSpatial.init();
        luceneSpatial.createIndex(GeoHelper.getCityGeoInfo("/Users/hanhan.zhang/Downloads/geo.txt"));
        luceneSpatial.search();
    }

}

 

三、地理信息文件

招远:  120.38,37.35
舟山:  122.207216,29.985295
齐齐哈尔:123.97,47.33
盐城:  120.13,33.38
赤峰:  118.87,42.28
青岛:  120.33,36.07
乳山:  121.52,36.89
金昌:  102.188043,38.520089
泉州:  118.58,24.93
莱西:  120.53,36.86
日照:  119.46,35.42
胶南:  119.97,35.88
南通:  121.05,32.08
拉萨:  91.11,29.97
云浮:  112.02,22.93
梅州:  116.1,24.55
文登:  122.05,37.2
上海:  121.48,31.22
攀枝花: 101.718637,26.582347
威海:  122.1,37.5
承德:  117.93,40.97
厦门:  118.1,24.46
汕尾:  115.375279,22.786211
潮州:  116.63,23.68
丹东:  124.37,40.13
太仓:  121.1,31.45
曲靖:  103.79,25.51
烟台:  121.39,37.52
福州:  119.3,26.08
瓦房店: 121.979603,39.627114
即墨:  120.45,36.38
抚顺:  123.97,41.97
玉溪:  102.52,24.35
张家口: 114.87,40.82
阳泉:  113.57,37.85
莱州:  119.942327,37.177017
湖州:  120.1,30.86
汕头:  116.69,23.39
昆山:  120.95,31.39
宁波:  121.56,29.86
湛江:  110.359377,21.270708
揭阳:  116.35,23.55
荣成:  122.41,37.16
连云港: 119.16,34.59
葫芦岛: 120.836932,40.711052
常熟:  120.74,31.64
东莞:  113.75,23.04
河源:  114.68,23.73
淮安:  119.15,33.5
泰州:  119.9,32.49
南宁:  108.33,22.84
营口:  122.18,40.65
惠州:  114.4,23.09
江阴:  120.26,31.91
蓬莱:  120.75,37.8
韶关:  113.62,24.84
嘉峪关: 98.289152,39.77313
广州:  113.23,23.16
延安:  109.47,36.6
太原:  112.53,37.87
清远:  113.01,23.7
中山:  113.38,22.52
昆明:  102.73,25.04
寿光:  118.73,36.86
盘锦:  122.070714,41.119997
长治:  113.08,36.18
深圳:  114.07,22.62
珠海:  113.52,22.3
宿迁:  118.3,33.96
咸阳:  108.72,34.36
铜川:  109.11,35.09
平度:  119.97,36.77
佛山:  113.11,23.05
海口:  110.35,20.02
江门:  113.06,22.61
章丘:  117.53,36.72
肇庆:  112.44,23.05
大连:  121.62,38.92
临汾:  111.5,36.08
吴江:  120.63,31.16
石嘴山: 106.39,39.04
沈阳:  123.38,41.8
苏州:  120.62,31.32
茂名:  110.88,21.68
嘉兴:  120.76,30.77
长春:  125.35,43.88
胶州:  120.03336,36.264622
银川:  106.27,38.47
张家港: 120.555821,31.875428
三门峡: 111.19,34.76
锦州:  121.15,41.13
南昌:  115.89,28.68
柳州:  109.4,24.33
三亚:  109.511909,18.252847
自贡:  104.778442,29.33903
吉林:  126.57,43.87
阳江:  111.95,21.85
泸州:  105.39,28.91
西宁:  101.74,36.56
宜宾:  104.56,29.77
呼和浩特:111.65,40.82
成都:  104.06,30.67
大同:  113.3,40.12
镇江:  119.44,32.2
桂林:  110.28,25.29
张家界: 110.479191,29.117096
宜兴:  119.82,31.36
北海:  109.12,21.49
西安:  108.95,34.27
金坛:  119.56,31.74
东营:  118.49,37.46
牡丹江: 129.58,44.6
遵义:  106.9,27.7
绍兴:  120.58,30.01
扬州:  119.42,32.39
常州:  119.95,31.79
潍坊:  119.1,36.62
重庆:  106.54,29.59
台州:  121.420757,28.656386
南京:  118.78,32.04
滨州:  118.03,37.36
贵阳:  106.71,26.57
无锡:  120.29,31.59
本溪:  123.73,41.3
克拉玛依:84.77,45.59
渭南:  109.5,34.52
马鞍山: 118.48,31.56
宝鸡:  107.15,34.38
焦作:  113.21,35.24
句容:  119.16,31.95
北京:  116.46,39.92
徐州:  117.2,34.26
衡水:  115.72,37.72
包头:  110,40.58
绵阳:  104.73,31.48
乌鲁木齐:87.68,43.77
枣庄:  117.57,34.86
杭州:  120.19,30.26
淄博:  118.05,36.78
鞍山:  122.85,41.12
溧阳:  119.48,31.43
库尔勒: 86.06,41.68
安阳:  114.35,36.1
开封:  114.35,34.79
济南:  117,36.65
德阳:  104.37,31.13
温州:  120.65,28.01
九江:  115.97,29.71
邯郸:  114.47,36.6
临安:  119.72,30.23
兰州:  103.73,36.03
沧州:  116.83,38.33
临沂:  118.35,35.05
南充:  106.110698,30.837793
天津:  117.2,39.13
富阳:  119.95,30.07
泰安:  117.13,36.18
诸暨:  120.23,29.71
郑州:  113.65,34.76
哈尔滨: 126.63,45.75
聊城:  115.97,36.45
芜湖:  118.38,31.33
唐山:  118.02,39.63
平顶山: 113.29,33.75
邢台:  114.48,37.05
德州:  116.29,37.45
济宁:  116.59,35.38
荆州:  112.239741,30.335165
宜昌:  111.3,30.7
义乌:  120.06,29.32
丽水:  119.92,28.45
洛阳:  112.44,34.7
秦皇岛: 119.57,39.95
株洲:  113.16,27.83
石家庄: 114.48,38.03
莱芜:  117.67,36.19
常德:  111.69,29.05
保定:  115.48,38.85
湘潭:  112.91,27.87
金华:  119.64,29.12
岳阳:  113.09,29.37
长沙:  113,28.21
衢州:  118.88,28.97
廊坊:  116.7,39.53
菏泽:  115.480656,35.23375
合肥:  117.27,31.86
武汉:  114.31,30.52
大庆:  125.03,46.5

 

posted on 2016-08-16 16:11  韩要奋斗  阅读(4813)  评论(0编辑  收藏  举报