1. 数据库结构
2. insert 测试
insert 的测试包括
1) 批量拼接values()插入
2) 有事务for循环插入
3) 无事务for循环插入
测试 SQL:
<!-- 普通 insert --> <insert id="insert" parameterType="com.qunar.mybatistask.bean.Post" keyProperty="id"> <![CDATA[ insert into post ( title, content, author, status, created ) values ( #{title}, #{content}, #{author}, #{status}, #{created} ) ]]> </insert> <!-- 拼接values insert --> <insert id="batchInsert" parameterType="java.util.List"> <![CDATA[ insert into post ( title, content, author, status, created ) values ]]> <foreach collection="list" item="post" separator=","> ( #{post.title}, #{post.content}, #{post.author}, #{post.status}, #{post.created} ) </foreach> </insert>
测试代码:
service
/** * 批量拼接VALUES() insert * * @param postList * @return */ @Override @Transactional(propagation = Propagation.REQUIRED) public int batchInsert(List<Post> postList) { int singleNum = 1000; int affectedRows = 0; for (int i = 0; i < Math.ceil((double)(postList.size() / singleNum)); i++) { affectedRows += sqlSession.insert("post.batchInsert", postList.subList(i * singleNum, (i + 1) * singleNum)); } return affectedRows; } /** * 事务内循环insert * * @param postList * @return */ @Override @Transactional(propagation = Propagation.REQUIRED) public int createList(List<Post> postList) { int affectedRows = 0; for (Post post : postList) { affectedRows += sqlSession.insert("post.insert", post); } return affectedRows; }
test case:
/** * 批量插入效率测试 * * Method: batchInsert(List<Post> postList) * */ @Test public void testBatchInsert() throws Exception { List<Post> postList = Lists.newArrayList(); for (int i = 0; i < 10000; i++) { Post post = new Post(); post.setAuthor("test"); post.setContent("test"); post.setCreated(new Date()); post.setTitle("test"); post.setStatus(PostStatus.NORMAL); postList.add(post); } // 批量拼接SQL插入 long start = System.nanoTime(); int affectedRows = postService.batchInsert(postList); double duration = System.nanoTime() - start; System.out.format("batch: %.2f\n", duration / 1.0e9); System.out.println("affected rows: " + affectedRows); // 事务内循环插入 start = System.nanoTime(); affectedRows = postService.createList(postList); duration = System.nanoTime() - start; System.out.format("transaction: %.2f\n", duration / 1.0e9); System.out.println("affected rows: " + affectedRows); // 无事务直接循环插入 start = System.nanoTime(); affectedRows = 0; for (Post post : postList) affectedRows += postService.create(post); duration = System.nanoTime() - start; System.out.format("simple: %.2f\n", duration / 1.0e9); System.out.println("affected rows: " + affectedRows); }
结果
batch: 1.44
affected rows: 10000
transaction: 2.87
affected rows: 10000
simple: 77.57
affected rows: 10000
总结:
排行
1) 使用拼接的手段,这种插入其实就是batch,只不过这是手动batch
2) 使用事务循环插入,相对于无事务快很多的原因大概是数据库连接和事务开启的次数
3) 无事务循环插入, 我想应该没人这么写
2. 单表循环查询与拼接in查询测试
SQL
<select id="selectById" parameterType="int" resultType="com.qunar.mybatistask.bean.Post"> <![CDATA[ select id, title, content, author, status, created from post where id = #{id} ]]> </select> <!-- 拼接where in条件查询 --> <select id="selectIn" parameterType="java.util.List" resultType="com.qunar.mybatistask.bean.Post"> <![CDATA[ select id, title, content, author, status, created from post where id in ]]> <foreach collection="list" item="id" open="(" close=")" separator=","> #{id} </foreach> </select>
Service
@Override public Post selectById(int id) { return sqlSession.selectOne("post.selectById", id); } @Override public List<Post> selectByIds(List<Integer> ids) { List<Post> postList = Lists.newArrayList(); int singleNum = 1000; int start; int end; for (int i = 0; i < Math.ceil(((double)ids.size() / (double)singleNum)); i++) { start = i * singleNum; end = (i + 1) * singleNum; end = end > ids.size() ? ids.size() : end; List<Post> result = sqlSession.selectList("post.selectIn", ids.subList(start, end)); postList.addAll(result); } return postList; }
test case
/** * 使用IN查询效率测试 * * @throws Exception */ @Test public void testInSelect() throws Exception { List<Integer> ids = Lists.newArrayList(); for (int i = 1; i < 10000; i++) { ids.add(i); } // in 查询 long start = System.nanoTime(); List<Post> list = postService.selectByIds(ids); double duration = System.nanoTime() - start; System.out.format("in select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); // 循环查询 list = Lists.newArrayList(); start = System.nanoTime(); for (int id : ids) list.add(postService.selectById(id)); duration = System.nanoTime() - start; System.out.format("simple select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); }
结果
in select: 0.55
list size: 9999
simple select: 6.24
list size: 9999
总结:
我想应该没人会用for循环去做查询吧
3. 多表联结查询, join, form 2个table, in, exists 比较
SQL
<!-- 用于循环查询 --> <select id="selectAll" resultType="com.qunar.mybatistask.bean.Comment"> <![CDATA[ select cmt.id as id, cmt.post_id as postId, cmt.content as content from cmt ]]> </select> <!-- join 查询 --> <select id="selectJoin" resultType="com.qunar.mybatistask.bean.Comment"> <![CDATA[ select cmt.id as id, cmt.post_id as postId, cmt.content as content from cmt join post on post.id = cmt.post_id ]]> </select> <!-- from 2个table --> <select id="selectTowTable" resultType="com.qunar.mybatistask.bean.Comment"> <![CDATA[ select cmt.id as id, cmt.post_id as postId, cmt.content as content from cmt, post where cmt.post_id = post.id ]]> </select> <!-- in 联表查询 --> <select id="selectIn" resultType="com.qunar.mybatistask.bean.Comment"> <![CDATA[ select cmt.id as id, cmt.post_id as postId, cmt.content as content from cmt where cmt.post_id in ( select post.id from post ) ]]> </select> <!-- exists 联表查询 --> <select id="selectExists" resultType="com.qunar.mybatistask.bean.Comment"> <![CDATA[ select cmt.id as id, cmt.post_id as postId, cmt.content as content from cmt where exists ( select post.id from post where post.id = cmt.id ) ]]> </select>
service
@Override public List<Comment> selectTwoTable() { return sqlSession.selectList("comment.selectTowTable"); } @Override public List<Comment> selectJoin() { return sqlSession.selectList("comment.selectJoin"); } @Override public List<Comment> selectIn() { return sqlSession.selectList("comment.selectIn"); } @Override public List<Comment> selectExists() { return sqlSession.selectList("comment.selectExists"); } @Override public List<Comment> selectAll() { return sqlSession.selectList("comment.selectAll"); }
test case
/** * 测试JOIN查询效率 * */ @Test public void testJoinSelect() { // join 查询 long start = System.nanoTime(); List<Comment> list = commentService.selectJoin(); double duration = System.nanoTime() - start; System.out.format("join select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); // From 两个表查询 start = System.nanoTime(); list = commentService.selectTwoTable(); duration = System.nanoTime() - start; System.out.format("2 table select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); // in多表查询 start = System.nanoTime(); list = commentService.selectIn(); duration = System.nanoTime() - start; System.out.format("in multi table select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); // exists多表查询 start = System.nanoTime(); list = commentService.selectExists(); duration = System.nanoTime() - start; System.out.format("exists multi table select: %.2f\n", duration / 1.0e9); System.out.println("list size: " + list.size()); // 分次查询, 太慢了, 忽略这种方法的测试吧 // start = System.nanoTime(); // list = commentService.selectAll(); // for (Comment comment : list) { // postService.selectById(comment.getPostId()); // } // duration = System.nanoTime() - start; // System.out.format("separate select: %.2f\n", duration / 1.0e9); // System.out.println("list size: " + list.size()); }
结果
join select: 2.44
list size: 210000
2 table select: 2.26
list size: 210000
in multi table select: 2.03
list size: 210000
exists multi table select: 2.35
list size: 210000
总结:
21W条数据下效率都差不多,而且我们一般会使用limit去限制查询的条数,所以应该他们的效率差距应该很小,我通过观察explain发现实际上join和from 2个table的方式的查询的执行计划是一模一样的,而in和exists的执行计划也是一模一样的
这里的表结构相对简单,也基本能用上索引 post_id 和 post.id 这些primary, 具体更加复杂的情况也许会影响这几种查询方式的执行计划, 才会体现出他们直接的差距, 当然我也相信他们执行的效率很大程度上是决定于mysql的优化器的优化策略,而这个优化策略很难人为的去判断,所以也不好说