PostgreSQL在何处处理 sql查询之五十四
接前面,从 cheapeast_path 的角度,关注 query_planner 函数,对其进行简化:
void query_planner(PlannerInfo *root, List *tlist, double tuple_fraction, double limit_tuples, Path **cheapest_path, Path **sorted_path, double *num_groups) { Query *parse = root->parse; List *joinlist; RelOptInfo *final_rel; Path *cheapestpath; Path *sortedpath; Index rti; double total_pages; ... *cheapest_path = cheapestpath; *sorted_path = sortedpath; }
再关注 cheapestpath:
Path *cheapestpath; ... /* * Pick out the cheapest-total path and the cheapest presorted path for * the requested pathkeys (if there is one). We should take the tuple * fraction into account when selecting the cheapest presorted path, but * not when selecting the cheapest-total path, since if we have to sort * then we'll have to fetch all the tuples. (But there's a special case: * if query_pathkeys is NIL, meaning order doesn't matter, then the * "cheapest presorted" path will be the cheapest overall for the tuple * fraction.) * * The cheapest-total path is also the one to use if grouping_planner * decides to use hashed aggregation, so we return it separately even if * this routine thinks the presorted path is the winner. */ cheapestpath = final_rel->cheapest_total_path;
再看 final_rel: 从final_rel的观点,对 query_planner 进行简化:
RelOptInfo *final_rel; ... /* * Ready to do the primary planning. */ final_rel = make_one_rel(root, joinlist); ... cheapestpath = final_rel->cheapest_total_path;
接着,对 make_one_rel里如何返回 cheapest_total_path 进行深入分析:
/* * make_one_rel * Finds all possible access paths for executing a query, returning a * single rel that represents the join of all base rels in the query. */ RelOptInfo * make_one_rel(PlannerInfo *root, List *joinlist) { RelOptInfo *rel; Index rti; /* * Construct the all_baserels Relids set. */ root->all_baserels = NULL; for (rti = 1; rti < root->simple_rel_array_size; rti++) { RelOptInfo *brel = root->simple_rel_array[rti]; /* there may be empty slots corresponding to non-baserel RTEs */ if (brel == NULL) continue; Assert(brel->relid == rti); /* sanity check on array */ /* ignore RTEs that are "other rels" */ if (brel->reloptkind != RELOPT_BASEREL) continue; root->all_baserels = bms_add_member(root->all_baserels, brel->relid); } /* * Generate access paths for the base rels. */ set_base_rel_sizes(root); set_base_rel_pathlists(root); /* * Generate access paths for the entire join tree. */ rel = make_rel_from_joinlist(root, joinlist); /* * The result should join all and only the query's base rels. */ Assert(bms_equal(rel->relids, root->all_baserels)); return rel; }
若要进一步分析,就是这个 make_rel_from_joinlist 了:
/* * make_rel_from_joinlist * Build access paths using a "joinlist" to guide the join path search. * * See comments for deconstruct_jointree() for definition of the joinlist * data structure. */ static RelOptInfo * make_rel_from_joinlist(PlannerInfo *root, List *joinlist) { int levels_needed; List *initial_rels; ListCell *jl; /* * Count the number of child joinlist nodes. This is the depth of the * dynamic-programming algorithm we must employ to consider all ways of * joining the child nodes. */ levels_needed = list_length(joinlist); if (levels_needed <= 0) return NULL; /* nothing to do? */ /* * Construct a list of rels corresponding to the child joinlist nodes. * This may contain both base rels and rels constructed according to * sub-joinlists. */ initial_rels = NIL; foreach(jl, joinlist) { Node *jlnode = (Node *) lfirst(jl); RelOptInfo *thisrel; if (IsA(jlnode, RangeTblRef)) { int varno = ((RangeTblRef *) jlnode)->rtindex; thisrel = find_base_rel(root, varno); } else if (IsA(jlnode, List)) { /* Recurse to handle subproblem */ thisrel = make_rel_from_joinlist(root, (List *) jlnode); } else { elog(ERROR, "unrecognized joinlist node type: %d", (int) nodeTag(jlnode)); thisrel = NULL; /* keep compiler quiet */ } initial_rels = lappend(initial_rels, thisrel); } if (levels_needed == 1) { /* * Single joinlist node, so we're done. */ return (RelOptInfo *) linitial(initial_rels); } else { /* * Consider the different orders in which we could join the rels, * using a plugin, GEQO, or the regular join search code. * * We put the initial_rels list into a PlannerInfo field because * has_legal_joinclause() needs to look at it (ugly :-(). */ root->initial_rels = initial_rels; if (join_search_hook) return (*join_search_hook) (root, levels_needed, initial_rels); else if (enable_geqo && levels_needed >= geqo_threshold) return geqo(root, levels_needed, initial_rels); else return standard_join_search(root, levels_needed, initial_rels); } }
对于我的简单查询,以上这个 (levels_needed == 1) 条件得到满足,所以就直接 return (RelOptInfo *) linitial(initial_rels);
按照这个视点,简化上述的程序:
static RelOptInfo * make_rel_from_joinlist(PlannerInfo *root, List *joinlist) { int levels_needed; List *initial_rels; ListCell *jl; /* * Count the number of child joinlist nodes. This is the depth of the * dynamic-programming algorithm we must employ to consider all ways of * joining the child nodes. */ levels_needed = list_length(joinlist); ... /* * Construct a list of rels corresponding to the child joinlist nodes. * This may contain both base rels and rels constructed according to * sub-joinlists. */ initial_rels = NIL; foreach(jl, joinlist) { Node *jlnode = (Node *) lfirst(jl); RelOptInfo *thisrel; if (IsA(jlnode, RangeTblRef)) { int varno = ((RangeTblRef *) jlnode)->rtindex; thisrel = find_base_rel(root, varno); } else if (IsA(jlnode, List)) { ... } ... initial_rels = lappend(initial_rels, thisrel); } if (levels_needed == 1) { /* * Single joinlist node, so we're done. */ return (RelOptInfo *) linitial(initial_rels); } else { ... } }
这句
int varno = ((RangeTblRef *) jlnode)->rtindex;
是拿到 RangeTable 的 index。
thisrel = find_base_rel(root, varno);
是利用刚才的 index 从 root 中把这个 rel 找出来。
initial_rels = lappend(initial_rels, thisrel);
是挂出一条链条。
return (RelOptInfo *) linitial(initial_rels);
是把第一个RelOptInfo对象取出来。
可以说,虽然是 在 make_rel_from_joinlist 中取出 rel 对象,但是其 cheapest_total_path ,在此之前早就准备好了。