1. 数据库结构

 

2. insert 测试

insert 的测试包括

1) 批量拼接values()插入

2) 有事务for循环插入

3) 无事务for循环插入

测试 SQL:

 <!-- 普通 insert -->
    <insert id="insert"
            parameterType="com.qunar.mybatistask.bean.Post"
            keyProperty="id">
        <![CDATA[

            insert into post (
                title,
                content,
                author,
                status,
                created
            ) values (
                #{title},
                #{content},
                #{author},
                #{status},
                #{created}
            )

        ]]>
    </insert>

    <!-- 拼接values insert -->
    <insert id="batchInsert"
            parameterType="java.util.List">
        <![CDATA[
            insert into post (
                title,
                content,
                author,
                status,
                created
            ) values
         ]]>
        <foreach collection="list" item="post" separator=",">
            (
            #{post.title},
            #{post.content},
            #{post.author},
            #{post.status},
            #{post.created}
            )
        </foreach>
    </insert>

测试代码:

service

    /**
     * 批量拼接VALUES() insert
     *
     * @param postList
     * @return
     */
    @Override
    @Transactional(propagation = Propagation.REQUIRED)
    public int batchInsert(List<Post> postList) {
        int singleNum = 1000;
        int affectedRows = 0;
        for (int i = 0; i < Math.ceil((double)(postList.size() / singleNum)); i++) {
            affectedRows += sqlSession.insert("post.batchInsert", postList.subList(i * singleNum, (i + 1) * singleNum));
        }
        return affectedRows;
    }

    /**
     * 事务内循环insert
     *
     * @param postList
     * @return
     */
    @Override
    @Transactional(propagation = Propagation.REQUIRED)
    public int createList(List<Post> postList) {
        int affectedRows = 0;
        for (Post post : postList) {
            affectedRows += sqlSession.insert("post.insert", post);
        }
        return affectedRows;
    }

test case:

    /**
     * 批量插入效率测试
     *
     * Method: batchInsert(List<Post> postList)
     * 
     */
    @Test
    public void testBatchInsert() throws Exception {
        List<Post> postList = Lists.newArrayList();
        for (int i = 0; i < 10000; i++) {
            Post post = new Post();
            post.setAuthor("test");
            post.setContent("test");
            post.setCreated(new Date());
            post.setTitle("test");
            post.setStatus(PostStatus.NORMAL);
            postList.add(post);
        }

        // 批量拼接SQL插入
        long start = System.nanoTime();
        int affectedRows = postService.batchInsert(postList);
        double duration = System.nanoTime() - start;
        System.out.format("batch: %.2f\n", duration / 1.0e9);
        System.out.println("affected rows: " + affectedRows);

        // 事务内循环插入
        start = System.nanoTime();
        affectedRows = postService.createList(postList);
        duration = System.nanoTime() - start;
        System.out.format("transaction: %.2f\n", duration / 1.0e9);
        System.out.println("affected rows: " + affectedRows);

        // 无事务直接循环插入
        start = System.nanoTime();
        affectedRows = 0;
        for (Post post : postList)
            affectedRows += postService.create(post);
        duration = System.nanoTime() - start;
        System.out.format("simple: %.2f\n", duration / 1.0e9);
        System.out.println("affected rows: " + affectedRows);
    }

结果 

batch: 1.44
affected rows: 10000
transaction: 2.87
affected rows: 10000
simple: 77.57
affected rows: 10000

 

总结:

排行

1) 使用拼接的手段,这种插入其实就是batch,只不过这是手动batch

2) 使用事务循环插入,相对于无事务快很多的原因大概是数据库连接和事务开启的次数

3) 无事务循环插入, 我想应该没人这么写

 

2. 单表循环查询与拼接in查询测试

SQL

<select id="selectById" parameterType="int" resultType="com.qunar.mybatistask.bean.Post">
         <![CDATA[

            select
                id,
                title,
                content,
                author,
                status,
                created
            from
                post
            where
                id = #{id}

        ]]>
    </select>

    <!-- 拼接where in条件查询 -->
    <select id="selectIn" parameterType="java.util.List" resultType="com.qunar.mybatistask.bean.Post">
        <![CDATA[
            select
                id,
                title,
                content,
                author,
                status,
                created
            from
                post
            where
                id in
        ]]>
        <foreach collection="list" item="id" open="(" close=")" separator=",">
            #{id}
        </foreach>
    </select>

Service

    @Override
    public Post selectById(int id) {
        return sqlSession.selectOne("post.selectById", id);
    }

    @Override
    public List<Post> selectByIds(List<Integer> ids) {
        List<Post> postList = Lists.newArrayList();
        int singleNum = 1000;
        int start;
        int end;
        for (int i = 0; i < Math.ceil(((double)ids.size() / (double)singleNum)); i++) {
            start = i * singleNum;
            end = (i + 1) * singleNum;
            end = end > ids.size() ? ids.size() : end;
            List<Post> result = sqlSession.selectList("post.selectIn", ids.subList(start, end));
            postList.addAll(result);
        }
        return postList;
    }

test case

    /**
     * 使用IN查询效率测试
     *
     * @throws Exception
     */
    @Test
    public void testInSelect() throws Exception {
        List<Integer> ids = Lists.newArrayList();
        for (int i = 1; i < 10000; i++) {
            ids.add(i);
        }

        // in 查询
        long start = System.nanoTime();
        List<Post> list = postService.selectByIds(ids);
        double duration = System.nanoTime() - start;
        System.out.format("in select: %.2f\n", duration / 1.0e9);
        System.out.println("list size: " + list.size());

        // 循环查询
        list = Lists.newArrayList();
        start = System.nanoTime();
        for (int id : ids)
            list.add(postService.selectById(id));
        duration = System.nanoTime() - start;
        System.out.format("simple select: %.2f\n", duration / 1.0e9);
        System.out.println("list size: " + list.size());
    }

结果

in select: 0.55
list size: 9999
simple select: 6.24
list size: 9999

 

总结:

我想应该没人会用for循环去做查询吧

 

3. 多表联结查询, join, form 2个table, in, exists 比较

SQL

    <!-- 用于循环查询 -->
    <select id="selectAll" resultType="com.qunar.mybatistask.bean.Comment">
        <![CDATA[
          select
            cmt.id as id,
            cmt.post_id as postId,
            cmt.content as content
          from
            cmt
        ]]>
    </select>

    <!-- join 查询 -->
    <select id="selectJoin" resultType="com.qunar.mybatistask.bean.Comment">
        <![CDATA[
          select
            cmt.id as id,
            cmt.post_id as postId,
            cmt.content as content
          from
            cmt
          join
            post
          on
            post.id = cmt.post_id
        ]]>
    </select>

    <!-- from 2个table -->
    <select id="selectTowTable" resultType="com.qunar.mybatistask.bean.Comment">
        <![CDATA[
          select
            cmt.id as id,
            cmt.post_id as postId,
            cmt.content as content
          from
            cmt, post
          where cmt.post_id = post.id
        ]]>
    </select>

    <!-- in 联表查询 -->
    <select id="selectIn" resultType="com.qunar.mybatistask.bean.Comment">
      <![CDATA[
          select
            cmt.id as id,
            cmt.post_id as postId,
            cmt.content as content
          from
            cmt
          where
            cmt.post_id
          in
            (
              select
                post.id
              from
                post
            )
      ]]>
    </select>

    <!-- exists 联表查询 -->
    <select id="selectExists" resultType="com.qunar.mybatistask.bean.Comment">
      <![CDATA[
          select
            cmt.id as id,
            cmt.post_id as postId,
            cmt.content as content
          from
            cmt
          where
            exists
            (
              select
                post.id
              from
                post
              where
                post.id = cmt.id
            )
      ]]>
    </select>

service

    @Override
    public List<Comment> selectTwoTable() {
        return sqlSession.selectList("comment.selectTowTable");
    }

    @Override
    public List<Comment> selectJoin() {
        return sqlSession.selectList("comment.selectJoin");
    }

    @Override
    public List<Comment> selectIn() {
       return sqlSession.selectList("comment.selectIn");
    }

    @Override
    public List<Comment> selectExists() {
        return sqlSession.selectList("comment.selectExists");
    }

    @Override
    public List<Comment> selectAll() {
        return sqlSession.selectList("comment.selectAll");
    }

test case

    /**
     * 测试JOIN查询效率
     *
     */
    @Test
    public void testJoinSelect() {
        // join 查询
        long start = System.nanoTime();
        List<Comment> list = commentService.selectJoin();
        double duration = System.nanoTime() - start;
        System.out.format("join select: %.2f\n", duration / 1.0e9);
        System.out.println("list size: " + list.size());

        // From 两个表查询
        start = System.nanoTime();
        list = commentService.selectTwoTable();
        duration = System.nanoTime() - start;
        System.out.format("2 table select: %.2f\n", duration / 1.0e9);
        System.out.println("list size: " + list.size());

        // in多表查询
        start = System.nanoTime();
        list = commentService.selectIn();
        duration = System.nanoTime() - start;
        System.out.format("in multi table select: %.2f\n", duration / 1.0e9);
        System.out.println("list size: " + list.size());

        // exists多表查询
        start = System.nanoTime();
        list = commentService.selectExists();
        duration = System.nanoTime() - start;
        System.out.format("exists multi table select: %.2f\n", duration / 1.0e9);
        System.out.println("list size: " + list.size());

        // 分次查询, 太慢了, 忽略这种方法的测试吧
//        start = System.nanoTime();
//        list = commentService.selectAll();
//        for (Comment comment : list) {
//            postService.selectById(comment.getPostId());
//        }
//        duration = System.nanoTime() - start;
//        System.out.format("separate select: %.2f\n", duration / 1.0e9);
//        System.out.println("list size: " + list.size());
    }

结果

join select: 2.44
list size: 210000
2 table select: 2.26
list size: 210000
in multi table select: 2.03
list size: 210000
exists multi table select: 2.35
list size: 210000

总结:

21W条数据下效率都差不多,而且我们一般会使用limit去限制查询的条数,所以应该他们的效率差距应该很小,我通过观察explain发现实际上join和from 2个table的方式的查询的执行计划是一模一样的,而in和exists的执行计划也是一模一样的

这里的表结构相对简单,也基本能用上索引 post_id 和 post.id 这些primary, 具体更加复杂的情况也许会影响这几种查询方式的执行计划, 才会体现出他们直接的差距, 当然我也相信他们执行的效率很大程度上是决定于mysql的优化器的优化策略,而这个优化策略很难人为的去判断,所以也不好说

posted on 2013-08-26 23:27  ZimZz  阅读(4638)  评论(0编辑  收藏  举报