flink 实现三角枚举EnumTriangles算法详解

1、三角枚举,从所有无向边对中找到相互连接的三角形

/**
 * @Author: xu.dm
 * @Date: 2019/7/4 21:31
 * @Description: 三角枚举算法
 * 三角枚举是在图(数据结构)中找到紧密连接的部分的预处理步骤。三角形由三条边连接,三条边相互连接。
 *
 * 该算法的工作原理如下:它将所有共享一个共同顶点的边(edge)分组,并构建三元组,即由两条边连接的顶点三元组。
 * 最后,通过join操作连接原数据和三元组,过滤所有三元组,去除不存在第三条边(闭合三角形)的三元组。
 *
 * 输入文件是纯文本文件,必须格式如下:
 *
 * 边缘表示为顶点ID的对,由空格字符分隔。边线由换行符分隔。
 * 例如,"1 2\n2 12\n1 12\n42 63"给出包括三角形的四个(无向)边(1) - (2),(2) - (12),(1) - (12)和(42) - (63)
 *     (1)
 *     /   \
 *  (2)-(12)
 * 用法:EnumTriangleBasic --edges <path> --output <path>
 * 如果未提供参数,则使用{@link EnumTrianglesData}中的默认数据运行程序。
 *
 */
public class EnumTriangles {
    public static void main(String args[]) throws Exception{
        // Checking input parameters
        final ParameterTool params = ParameterTool.fromArgs(args);

        // set up execution environment
        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        // make parameters available in the web interface
        env.getConfig().setGlobalJobParameters(params);

        // read input data
        DataSet<EnumTrianglesDataTypes.Edge> edges;
        if (params.has("edges")) {
            edges = env.readCsvFile(params.get("edges"))
                    .fieldDelimiter(" ")
                    .includeFields(true, true)
                    .types(Integer.class, Integer.class)
                    .map(new TupleEdgeConverter());
        } else {
            System.out.println("Executing EnumTriangles example with default edges data set.");
            System.out.println("Use --edges to specify file input.");
            edges = EnumTrianglesData.getDefaultEdgeDataSet(env);
        }

        // project edges by vertex id
        // 对Edge里V1,V2排序,让小的在前
        DataSet<EnumTrianglesDataTypes.Edge> edgesById = edges
                .map(new EdgeByIdProjector());

        DataSet<EnumTrianglesDataTypes.Triad> triangles = edgesById
                // build triads
                .groupBy(EnumTrianglesDataTypes.Edge.V1)
                .sortGroup(EnumTrianglesDataTypes.Edge.V2, Order.ASCENDING)
                .reduceGroup(new TriadBuilder())
                //通过Triad的第二和第三字段与Edge的第一和第二字段join,找出有第二和第三有路径的对子。
                .join(edgesById)
                .where(EnumTrianglesDataTypes.Triad.V2, EnumTrianglesDataTypes.Triad.V3)
                .equalTo(EnumTrianglesDataTypes.Edge.V1, EnumTrianglesDataTypes.Edge.V2)
                // filter triads
                .with(new TriadFilter());

        // emit result
        if (params.has("output")) {
            triangles.writeAsCsv(params.get("output"), "\n", ",");
            // execute program
            env.execute("Basic Triangle Enumeration Example");
        } else {
            System.out.println("Printing result to stdout. Use --output to specify output path.");
            triangles.print();
        }
    }

    //转换Tuple2类型到Edge类型
    @FunctionAnnotation.ForwardedFields("0;1")
    public static class TupleEdgeConverter implements MapFunction<Tuple2<Integer,Integer>, EnumTrianglesDataTypes.Edge>{
        private final EnumTrianglesDataTypes.Edge outEdge = new EnumTrianglesDataTypes.Edge();
        @Override
        public EnumTrianglesDataTypes.Edge map(Tuple2<Integer, Integer> value) throws Exception {
            outEdge.copyVerticesFromTuple2(value);
            return outEdge;
        }
    }

    /**
     * Projects an edge (pair of vertices) such that the id of the first is smaller than the id of the second.
     * 转换一对边,如果first大于second,则互相交换
     * */
    private static class EdgeByIdProjector implements MapFunction<EnumTrianglesDataTypes.Edge, EnumTrianglesDataTypes.Edge>{
        @Override
        public EnumTrianglesDataTypes.Edge map(EnumTrianglesDataTypes.Edge value) throws Exception {

            //flip vertices 反转顶点
            if(value.getFirstVertex()>value.getSecondVertex())
            {
                value.flipVertices();
            }
            return value;
        }
    }

    /**
     *  Builds triads (triples of vertices) from pairs of edges that share a vertex.
     *  The first vertex of a triad is the shared vertex, the second and third vertex are ordered by vertexId.
     *  Assumes that input edges share the first vertex and are in ascending order of the second vertex.
     *  构造triads(三位一体结构),即经过分组排序后,拥有共同顶点的edge,按顺序(分组后顶点需要升序)组合成三体
     *  构成Triad的第一个顶点是分组id,也是分组里所有对子(edge)共享的id,第二和第三个顶点是分组的对子(edge)里的第二个顶点,edge他们是按升序排列。
     *  分组结构类似如下,s1就是共享的顶点,f1到f3是按升序排列的对子(s1-f1),(s1-f2),(f1-f3)。
     *       f1
     *     /
     *  s1 一 f2
     *     \
     *     f3
     */
    @FunctionAnnotation.ForwardedFields("0")
    private static class TriadBuilder
            implements GroupReduceFunction<EnumTrianglesDataTypes.Edge, EnumTrianglesDataTypes.Triad>{
        private final List<Integer> vertices = new ArrayList<>();
        private final EnumTrianglesDataTypes.Triad outTriad = new EnumTrianglesDataTypes.Triad();

        @Override
        public void reduce(Iterable<EnumTrianglesDataTypes.Edge> values, Collector<EnumTrianglesDataTypes.Triad> out) throws Exception {
            final Iterator<EnumTrianglesDataTypes.Edge> edges = values.iterator();

            // clear vertex list
            vertices.clear();

            // read first edge
            EnumTrianglesDataTypes.Edge firstEdge = edges.next();
            outTriad.setFirstVertex(firstEdge.getFirstVertex());

            vertices.add(firstEdge.getSecondVertex());

            // build and emit triads
            if(edges.hasNext()){
                Integer higherVertexId = edges.next().getSecondVertex();

                //combine vertex with all previously read vertices
                for(Integer lowVertexId:vertices){
                    outTriad.setSecondVertex(lowVertexId);
                    outTriad.setThirdVertex(higherVertexId);
                    out.collect(outTriad);
                }
                vertices.add(higherVertexId);
            }
        }
    }

    /**
     * Filters triads (three vertices connected by two edges) without a closing third edge.
     * 过滤没有闭合边的三体
     * */
    private static class TriadFilter
            implements JoinFunction<EnumTrianglesDataTypes.Triad, EnumTrianglesDataTypes.Edge, EnumTrianglesDataTypes.Triad>{
        @Override
        public EnumTrianglesDataTypes.Triad join(EnumTrianglesDataTypes.Triad first, EnumTrianglesDataTypes.Edge second) throws Exception {
            return first;
        }
    }

}

2、三角枚举所需要数据结构等

public class EnumTrianglesDataTypes {
    /**
     * A POJO storing two vertex IDs.
     */
    public static class Edge extends Tuple2<Integer, Integer> {
        private static final long serialVersionUID = 1L;

        public static final int V1 = 0;
        public static final int V2 = 1;

        public Edge() {}

        public Edge(final Integer v1, final Integer v2) {
            this.setFirstVertex(v1);
            this.setSecondVertex(v2);
        }

        public Integer getFirstVertex() {
            return this.getField(V1);
        }

        public Integer getSecondVertex() {
            return this.getField(V2);
        }

        public void setFirstVertex(final Integer vertex1) {
            this.setField(vertex1, V1);
        }

        public void setSecondVertex(final Integer vertex2) {
            this.setField(vertex2, V2);
        }

        public void copyVerticesFromTuple2(Tuple2<Integer, Integer> t) {
            this.setFirstVertex(t.f0);
            this.setSecondVertex(t.f1);
        }

        public void copyVerticesFromEdgeWithDegrees(EdgeWithDegrees ewd) {
            this.setFirstVertex(ewd.getFirstVertex());
            this.setSecondVertex(ewd.getSecondVertex());
        }

        public void flipVertices() {
            Integer tmp = this.getFirstVertex();
            this.setFirstVertex(this.getSecondVertex());
            this.setSecondVertex(tmp);
        }
    }

    /**
     * A POJO storing three vertex IDs.
     */
    public static class Triad extends Tuple3<Integer, Integer, Integer> {
        private static final long serialVersionUID = 1L;

        public static final int V1 = 0;
        public static final int V2 = 1;
        public static final int V3 = 2;

        public Triad() {}

        public void setFirstVertex(final Integer vertex1) {
            this.setField(vertex1, V1);
        }

        public void setSecondVertex(final Integer vertex2) {
            this.setField(vertex2, V2);
        }

        public void setThirdVertex(final Integer vertex3) {
            this.setField(vertex3, V3);
        }
    }

    /**
     * A POJO storing two vertex IDs with degree.
     */
    public static class EdgeWithDegrees extends Tuple4<Integer, Integer, Integer, Integer> {
        private static final long serialVersionUID = 1L;

        public static final int V1 = 0;
        public static final int V2 = 1;
        public static final int D1 = 2;
        public static final int D2 = 3;

        public EdgeWithDegrees() { }

        public Integer getFirstVertex() {
            return this.getField(V1);
        }

        public Integer getSecondVertex() {
            return this.getField(V2);
        }

        public Integer getFirstDegree() {
            return this.getField(D1);
        }

        public Integer getSecondDegree() {
            return this.getField(D2);
        }

        public void setFirstVertex(final Integer vertex1) {
            this.setField(vertex1, V1);
        }

        public void setSecondVertex(final Integer vertex2) {
            this.setField(vertex2, V2);
        }

        public void setFirstDegree(final Integer degree1) {
            this.setField(degree1, D1);
        }

        public void setSecondDegree(final Integer degree2) {
            this.setField(degree2, D2);
        }

        public void copyFrom(final EdgeWithDegrees edge) {
            this.setFirstVertex(edge.getFirstVertex());
            this.setSecondVertex(edge.getSecondVertex());
            this.setFirstDegree(edge.getFirstDegree());
            this.setSecondDegree(edge.getSecondDegree());
        }
    }
}

3、测试数据

public class EnumTrianglesData {
    public static final Object[][] EDGES = {
            {1, 2},
            {1, 3},
            {1, 4},
            {1, 5},
            {2, 3},
            {2, 5},
            {3, 4},
            {3, 5},
            {3, 7},
            {3, 8},
            {5, 6},
            {7, 8}
    };

    public static DataSet<EnumTrianglesDataTypes.Edge> getDefaultEdgeDataSet(ExecutionEnvironment env) {

        List<EnumTrianglesDataTypes.Edge> edges = new ArrayList<EnumTrianglesDataTypes.Edge>();
        for (Object[] e : EDGES) {
            edges.add(new EnumTrianglesDataTypes.Edge((Integer) e[0], (Integer) e[1]));
        }

        return env.fromCollection(edges);
    }
}

 

 
posted @ 2019-07-06 17:39  我是属车的  阅读(511)  评论(0编辑  收藏  举报