PostgreSQL在何处处理 sql查询之四十六
接前面,再上溯:set_base_rel_pathlists --> set_rel_pathlist
/* * set_base_rel_pathlists * Finds all paths available for scanning each base-relation entry. * Sequential scan and any available indices are considered. * Each useful path is attached to its relation's 'pathlist' field. */ static void set_base_rel_pathlists(PlannerInfo *root) { //fprintf(stderr, "set_base_rel_pathlists... by process %d\n",getpid()); Index rti; for (rti = 1; rti < root->simple_rel_array_size; rti++) { RelOptInfo *rel = root->simple_rel_array[rti]; /* there may be empty slots corresponding to non-baserel RTEs */ if (rel == NULL) continue; Assert(rel->relid == rti); /* sanity check on array */ /* ignore RTEs that are "other rels" */ if (rel->reloptkind != RELOPT_BASEREL) continue; set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]); } }
再上溯:make_one_rel --> set_base_rel_pathlists
/* * make_one_rel * Finds all possible access paths for executing a query, returning a * single rel that represents the join of all base rels in the query. */ RelOptInfo * make_one_rel(PlannerInfo *root, List *joinlist) { RelOptInfo *rel; Index rti; /* * Construct the all_baserels Relids set. */ root->all_baserels = NULL; for (rti = 1; rti < root->simple_rel_array_size; rti++) { RelOptInfo *brel = root->simple_rel_array[rti]; /* there may be empty slots corresponding to non-baserel RTEs */ if (brel == NULL) continue; Assert(brel->relid == rti); /* sanity check on array */ /* ignore RTEs that are "other rels" */ if (brel->reloptkind != RELOPT_BASEREL) continue; root->all_baserels = bms_add_member(root->all_baserels, brel->relid); } /* * Generate access paths for the base rels. */ set_base_rel_sizes(root); set_base_rel_pathlists(root); /* * Generate access paths for the entire join tree. */ rel = make_rel_from_joinlist(root, joinlist); /* * The result should join all and only the query's base rels. */ Assert(bms_equal(rel->relids, root->all_baserels)); return rel; }
再上溯:query_planner --> make_one_rel
/* * query_planner * Generate a path (that is, a simplified plan) for a basic query, * which may involve joins but not any fancier features. * * Since query_planner does not handle the toplevel processing (grouping, * sorting, etc) it cannot select the best path by itself. It selects * two paths: the cheapest path that produces all the required tuples, * independent of any ordering considerations, and the cheapest path that * produces the expected fraction of the required tuples in the required * ordering, if there is a path that is cheaper for this than just sorting * the output of the cheapest overall path. The caller (grouping_planner) * will make the final decision about which to use. * * Input parameters: * root describes the query to plan * tlist is the target list the query should produce * (this is NOT necessarily root->parse->targetList!) * tuple_fraction is the fraction of tuples we expect will be retrieved * limit_tuples is a hard limit on number of tuples to retrieve, * or -1 if no limit * * Output parameters: * *cheapest_path receives the overall-cheapest path for the query * *sorted_path receives the cheapest presorted path for the query, * if any (NULL if there is no useful presorted path) * *num_groups receives the estimated number of groups, or 1 if query * does not use grouping * * Note: the PlannerInfo node also includes a query_pathkeys field, which is * both an input and an output of query_planner(). The input value signals * query_planner that the indicated sort order is wanted in the final output * plan. But this value has not yet been "canonicalized", since the needed * info does not get computed until we scan the qual clauses. We canonicalize * it as soon as that task is done. (The main reason query_pathkeys is a * PlannerInfo field and not a passed parameter is that the low-level routines * in indxpath.c need to see it.) * * Note: the PlannerInfo node includes other pathkeys fields besides * query_pathkeys, all of which need to be canonicalized once the info is * available. See canonicalize_all_pathkeys. * * tuple_fraction is interpreted as follows: * 0: expect all tuples to be retrieved (normal case) * 0 < tuple_fraction < 1: expect the given fraction of tuples available * from the plan to be retrieved * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples * expected to be retrieved (ie, a LIMIT specification) * Note that a nonzero tuple_fraction could come from outer context; it is * therefore not redundant with limit_tuples. We use limit_tuples to determine * whether a bounded sort can be used at runtime. */ void query_planner(PlannerInfo *root, List *tlist, double tuple_fraction, double limit_tuples, Path **cheapest_path, Path **sorted_path, double *num_groups) { Query *parse = root->parse; List *joinlist; RelOptInfo *final_rel; Path *cheapestpath; Path *sortedpath; Index rti; double total_pages; /* Make tuple_fraction, limit_tuples accessible to lower-level routines */ root->tuple_fraction = tuple_fraction; root->limit_tuples = limit_tuples; *num_groups = 1; /* default result */ /* * If the query has an empty join tree, then it's something easy like * "SELECT 2+2;" or "INSERT ... VALUES()". Fall through quickly. */ if (parse->jointree->fromlist == NIL) { /* We need a trivial path result */ *cheapest_path = (Path *) create_result_path((List *) parse->jointree->quals); *sorted_path = NULL; /* * We still are required to canonicalize any pathkeys, in case it's * something like "SELECT 2+2 ORDER BY 1". */ root->canon_pathkeys = NIL; canonicalize_all_pathkeys(root); return; } /* * Init planner lists to empty. * * NOTE: append_rel_list was set up by subquery_planner, so do not touch * here; eq_classes and minmax_aggs may contain data already, too. */ root->join_rel_list = NIL; root->join_rel_hash = NULL; root->join_rel_level = NULL; root->join_cur_level = 0; root->canon_pathkeys = NIL; root->left_join_clauses = NIL; root->right_join_clauses = NIL; root->full_join_clauses = NIL; root->join_info_list = NIL; root->placeholder_list = NIL; root->initial_rels = NIL; /* * Make a flattened version of the rangetable for faster access (this is * OK because the rangetable won't change any more), and set up an empty * array for indexing base relations. */ setup_simple_rel_arrays(root); /* * Construct RelOptInfo nodes for all base relations in query, and * indirectly for all appendrel member relations ("other rels"). This * will give us a RelOptInfo for every "simple" (non-join) rel involved in * the query. * * Note: the reason we find the rels by searching the jointree and * appendrel list, rather than just scanning the rangetable, is that the * rangetable may contain RTEs for rels not actively part of the query, * for example views. We don't want to make RelOptInfos for them. */ add_base_rels_to_query(root, (Node *) parse->jointree); /* * Examine the targetlist and join tree, adding entries to baserel * targetlists for all referenced Vars, and generating PlaceHolderInfo * entries for all referenced PlaceHolderVars. Restrict and join clauses * are added to appropriate lists belonging to the mentioned relations. We * also build EquivalenceClasses for provably equivalent expressions. The * SpecialJoinInfo list is also built to hold information about join order * restrictions. Finally, we form a target joinlist for make_one_rel() to * work from. */ build_base_rel_tlists(root, tlist); find_placeholders_in_jointree(root); joinlist = deconstruct_jointree(root); /* * Reconsider any postponed outer-join quals now that we have built up * equivalence classes. (This could result in further additions or * mergings of classes.) */ reconsider_outer_join_clauses(root); /* * If we formed any equivalence classes, generate additional restriction * clauses as appropriate. (Implied join clauses are formed on-the-fly * later.) */ generate_base_implied_equalities(root); /* * We have completed merging equivalence sets, so it's now possible to * convert previously generated pathkeys (in particular, the requested * query_pathkeys) to canonical form. */ canonicalize_all_pathkeys(root); /* * Examine any "placeholder" expressions generated during subquery pullup. * Make sure that the Vars they need are marked as needed at the relevant * join level. This must be done before join removal because it might * cause Vars or placeholders to be needed above a join when they weren't * so marked before. */ fix_placeholder_input_needed_levels(root); /* * Remove any useless outer joins. Ideally this would be done during * jointree preprocessing, but the necessary information isn't available * until we've built baserel data structures and classified qual clauses. */ joinlist = remove_useless_joins(root, joinlist); /* * Now distribute "placeholders" to base rels as needed. This has to be * done after join removal because removal could change whether a * placeholder is evaluatable at a base rel. */ add_placeholders_to_base_rels(root); /* * We should now have size estimates for every actual table involved in * the query, and we also know which if any have been deleted from the * query by join removal; so we can compute total_table_pages. * * Note that appendrels are not double-counted here, even though we don't * bother to distinguish RelOptInfos for appendrel parents, because the * parents will still have size zero. * * XXX if a table is self-joined, we will count it once per appearance, * which perhaps is the wrong thing ... but that's not completely clear, * and detecting self-joins here is difficult, so ignore it for now. */ total_pages = 0; for (rti = 1; rti < root->simple_rel_array_size; rti++) { RelOptInfo *brel = root->simple_rel_array[rti]; if (brel == NULL) continue; Assert(brel->relid == rti); /* sanity check on array */ if (brel->reloptkind == RELOPT_BASEREL || brel->reloptkind == RELOPT_OTHER_MEMBER_REL) total_pages += (double) brel->pages; } root->total_table_pages = total_pages; /* * Ready to do the primary planning. */ final_rel = make_one_rel(root, joinlist); if (!final_rel || !final_rel->cheapest_total_path) elog(ERROR, "failed to construct the join relation"); /* * If there's grouping going on, estimate the number of result groups. We * couldn't do this any earlier because it depends on relation size * estimates that were set up above. * * Then convert tuple_fraction to fractional form if it is absolute, and * adjust it based on the knowledge that grouping_planner will be doing * grouping or aggregation work with our result. * * This introduces some undesirable coupling between this code and * grouping_planner, but the alternatives seem even uglier; we couldn't * pass back completed paths without making these decisions here. */ if (parse->groupClause) { List *groupExprs; groupExprs = get_sortgrouplist_exprs(parse->groupClause, parse->targetList); *num_groups = estimate_num_groups(root, groupExprs, final_rel->rows); /* * In GROUP BY mode, an absolute LIMIT is relative to the number of * groups not the number of tuples. If the caller gave us a fraction, * keep it as-is. (In both cases, we are effectively assuming that * all the groups are about the same size.) */ if (tuple_fraction >= 1.0) tuple_fraction /= *num_groups; /* * If both GROUP BY and ORDER BY are specified, we will need two * levels of sort --- and, therefore, certainly need to read all the * tuples --- unless ORDER BY is a subset of GROUP BY. Likewise if we * have both DISTINCT and GROUP BY, or if we have a window * specification not compatible with the GROUP BY. */ if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) || !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) || !pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys)) tuple_fraction = 0.0; /* In any case, limit_tuples shouldn't be specified here */ Assert(limit_tuples < 0); } else if (parse->hasAggs || root->hasHavingQual) { /* * Ungrouped aggregate will certainly want to read all the tuples, and * it will deliver a single result row (so leave *num_groups 1). */ tuple_fraction = 0.0; /* limit_tuples shouldn't be specified here */ Assert(limit_tuples < 0); } else if (parse->distinctClause) { /* * Since there was no grouping or aggregation, it's reasonable to * assume the UNIQUE filter has effects comparable to GROUP BY. Return * the estimated number of output rows for use by caller. (If DISTINCT * is used with grouping, we ignore its effects for rowcount * estimation purposes; this amounts to assuming the grouped rows are * distinct already.) */ List *distinctExprs; distinctExprs = get_sortgrouplist_exprs(parse->distinctClause, parse->targetList); *num_groups = estimate_num_groups(root, distinctExprs, final_rel->rows); /* * Adjust tuple_fraction the same way as for GROUP BY, too. */ if (tuple_fraction >= 1.0) tuple_fraction /= *num_groups; /* limit_tuples shouldn't be specified here */ Assert(limit_tuples < 0); } else { /* * Plain non-grouped, non-aggregated query: an absolute tuple fraction * can be divided by the number of tuples. */ if (tuple_fraction >= 1.0) tuple_fraction /= final_rel->rows; } /* * Pick out the cheapest-total path and the cheapest presorted path for * the requested pathkeys (if there is one). We should take the tuple * fraction into account when selecting the cheapest presorted path, but * not when selecting the cheapest-total path, since if we have to sort * then we'll have to fetch all the tuples. (But there's a special case: * if query_pathkeys is NIL, meaning order doesn't matter, then the * "cheapest presorted" path will be the cheapest overall for the tuple * fraction.) * * The cheapest-total path is also the one to use if grouping_planner * decides to use hashed aggregation, so we return it separately even if * this routine thinks the presorted path is the winner. */ cheapestpath = final_rel->cheapest_total_path; sortedpath = get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, root->query_pathkeys, NULL, tuple_fraction); /* Don't return same path in both guises; just wastes effort */ if (sortedpath == cheapestpath) sortedpath = NULL; /* * Forget about the presorted path if it would be cheaper to sort the * cheapest-total path. Here we need consider only the behavior at the * tuple fraction point. */ if (sortedpath) { Path sort_path; /* dummy for result of cost_sort */ if (root->query_pathkeys == NIL || pathkeys_contained_in(root->query_pathkeys, cheapestpath->pathkeys)) { /* No sort needed for cheapest path */ sort_path.startup_cost = cheapestpath->startup_cost; sort_path.total_cost = cheapestpath->total_cost; } else { /* Figure cost for sorting */ cost_sort(&sort_path, root, root->query_pathkeys, cheapestpath->total_cost, final_rel->rows, final_rel->width, 0.0, work_mem, limit_tuples); } if (compare_fractional_path_costs(sortedpath, &sort_path, tuple_fraction) > 0) { /* Presorted path is a loser */ sortedpath = NULL; } } *cheapest_path = cheapestpath; *sorted_path = sortedpath; }
接下来从 query_planner 开始再上溯:
/*-------------------- * grouping_planner * Perform planning steps related to grouping, aggregation, etc. * This primarily means adding top-level processing to the basic * query plan produced by query_planner. * * tuple_fraction is the fraction of tuples we expect will be retrieved * * tuple_fraction is interpreted as follows: * 0: expect all tuples to be retrieved (normal case) * 0 < tuple_fraction < 1: expect the given fraction of tuples available * from the plan to be retrieved * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples * expected to be retrieved (ie, a LIMIT specification) * * Returns a query plan. Also, root->query_pathkeys is returned as the * actual output ordering of the plan (in pathkey format). *-------------------- */ static Plan * grouping_planner(PlannerInfo *root, double tuple_fraction) { Query *parse = root->parse; List *tlist = parse->targetList; int64 offset_est = 0; int64 count_est = 0; double limit_tuples = -1.0; Plan *result_plan; List *current_pathkeys; double dNumGroups = 0; bool use_hashed_distinct = false; bool tested_hashed_distinct = false; /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ if (parse->limitCount || parse->limitOffset) { tuple_fraction = preprocess_limit(root, tuple_fraction, &offset_est, &count_est); /* * If we have a known LIMIT, and don't have an unknown OFFSET, we can * estimate the effects of using a bounded sort. */ if (count_est > 0 && offset_est >= 0) limit_tuples = (double) count_est + (double) offset_est; } if (parse->setOperations) { List *set_sortclauses; /* * If there's a top-level ORDER BY, assume we have to fetch all the * tuples. This might be too simplistic given all the hackery below * to possibly avoid the sort; but the odds of accurate estimates here * are pretty low anyway. */ if (parse->sortClause) tuple_fraction = 0.0; /* * Construct the plan for set operations. The result will not need * any work except perhaps a top-level sort and/or LIMIT. Note that * any special work for recursive unions is the responsibility of * plan_set_operations. */ result_plan = plan_set_operations(root, tuple_fraction, &set_sortclauses); /* * Calculate pathkeys representing the sort order (if any) of the set * operation's result. We have to do this before overwriting the sort * key information... */ current_pathkeys = make_pathkeys_for_sortclauses(root, set_sortclauses, result_plan->targetlist, true); /* * We should not need to call preprocess_targetlist, since we must be * in a SELECT query node. Instead, use the targetlist returned by * plan_set_operations (since this tells whether it returned any * resjunk columns!), and transfer any sort key information from the * original tlist. */ Assert(parse->commandType == CMD_SELECT); tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist), tlist); /* * Can't handle FOR UPDATE/SHARE here (parser should have checked * already, but let's make sure). */ if (parse->rowMarks) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT"))); /* * Calculate pathkeys that represent result ordering requirements */ Assert(parse->distinctClause == NIL); root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, true); } else { /* No set operations, do regular planning */ List *sub_tlist; double sub_limit_tuples; AttrNumber *groupColIdx = NULL; bool need_tlist_eval = true; Path *cheapest_path; Path *sorted_path; Path *best_path; long numGroups = 0; AggClauseCosts agg_costs; int numGroupCols; double path_rows; int path_width; bool use_hashed_grouping = false; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); /* A recursive query should always have setOperations */ Assert(!root->hasRecursion); /* Preprocess GROUP BY clause, if any */ if (parse->groupClause) preprocess_groupclause(root); numGroupCols = list_length(parse->groupClause); /* Preprocess targetlist */ tlist = preprocess_targetlist(root, tlist); /* * Locate any window functions in the tlist. (We don't need to look * anywhere else, since expressions used in ORDER BY will be in there * too.) Note that they could all have been eliminated by constant * folding, in which case we don't need to do any more work. */ if (parse->hasWindowFuncs) { wflists = find_window_functions((Node *) tlist, list_length(parse->windowClause)); if (wflists->numWindowFuncs > 0) activeWindows = select_active_windows(root, wflists); else parse->hasWindowFuncs = false; } /* * Generate appropriate target list for subplan; may be different from * tlist if grouping or aggregation is needed. */ sub_tlist = make_subplanTargetList(root, tlist, &groupColIdx, &need_tlist_eval); /* * Do aggregate preprocessing, if the query has any aggs. * * Note: think not that we can turn off hasAggs if we find no aggs. It * is possible for constant-expression simplification to remove all * explicit references to aggs, but we still have to follow the * aggregate semantics (eg, producing only one output row). */ if (parse->hasAggs) { /* * Collect statistics about aggregates for estimating costs. Note: * we do not attempt to detect duplicate aggregates here; a * somewhat-overestimated cost is okay for our present purposes. */ count_agg_clauses(root, (Node *) tlist, &agg_costs); count_agg_clauses(root, parse->havingQual, &agg_costs); /* * Preprocess MIN/MAX aggregates, if any. Note: be careful about * adding logic between here and the optimize_minmax_aggregates * call. Anything that is needed in MIN/MAX-optimizable cases * will have to be duplicated in planagg.c. */ preprocess_minmax_aggregates(root, tlist); } /* * Calculate pathkeys that represent grouping/ordering requirements. * Stash them in PlannerInfo so that query_planner can canonicalize * them after EquivalenceClasses have been formed. The sortClause is * certainly sort-able, but GROUP BY and DISTINCT might not be, in * which case we just leave their pathkeys empty. */ if (parse->groupClause && grouping_is_sortable(parse->groupClause)) root->group_pathkeys = make_pathkeys_for_sortclauses(root, parse->groupClause, tlist, false); else root->group_pathkeys = NIL; /* We consider only the first (bottom) window in pathkeys logic */ if (activeWindows != NIL) { WindowClause *wc = (WindowClause *) linitial(activeWindows); root->window_pathkeys = make_pathkeys_for_window(root, wc, tlist, false); } else root->window_pathkeys = NIL; if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) root->distinct_pathkeys = make_pathkeys_for_sortclauses(root, parse->distinctClause, tlist, false); else root->distinct_pathkeys = NIL; root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, false); /* * Figure out whether we want a sorted result from query_planner. * * If we have a sortable GROUP BY clause, then we want a result sorted * properly for grouping. Otherwise, if we have window functions to * evaluate, we try to sort for the first window. Otherwise, if * there's a sortable DISTINCT clause that's more rigorous than the * ORDER BY clause, we try to produce output that's sufficiently well * sorted for the DISTINCT. Otherwise, if there is an ORDER BY * clause, we want to sort by the ORDER BY clause. * * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a * superset of GROUP BY, it would be tempting to request sort by ORDER * BY --- but that might just leave us failing to exploit an available * sort order at all. Needs more thought. The choice for DISTINCT * versus ORDER BY is much easier, since we know that the parser * ensured that one is a superset of the other. */ if (root->group_pathkeys) root->query_pathkeys = root->group_pathkeys; else if (root->window_pathkeys) root->query_pathkeys = root->window_pathkeys; else if (list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys)) root->query_pathkeys = root->distinct_pathkeys; else if (root->sort_pathkeys) root->query_pathkeys = root->sort_pathkeys; else root->query_pathkeys = NIL; /* * Figure out whether there's a hard limit on the number of rows that * query_planner's result subplan needs to return. Even if we know a * hard limit overall, it doesn't apply if the query has any * grouping/aggregation operations. */ if (parse->groupClause || parse->distinctClause || parse->hasAggs || parse->hasWindowFuncs || root->hasHavingQual) sub_limit_tuples = -1.0; else sub_limit_tuples = limit_tuples; /* * Generate the best unsorted and presorted paths for this Query (but * note there may not be any presorted path). query_planner will also * estimate the number of groups in the query, and canonicalize all * the pathkeys. */ query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples, &cheapest_path, &sorted_path, &dNumGroups); /* * Extract rowcount and width estimates for possible use in grouping * decisions. Beware here of the possibility that * cheapest_path->parent is NULL (ie, there is no FROM clause). */ if (cheapest_path->parent) { path_rows = cheapest_path->parent->rows; path_width = cheapest_path->parent->width; } else { path_rows = 1; /* assume non-set result */ path_width = 100; /* arbitrary */ } if (parse->groupClause) { /* * If grouping, decide whether to use sorted or hashed grouping. */ use_hashed_grouping = choose_hashed_grouping(root, tuple_fraction, limit_tuples, path_rows, path_width, cheapest_path, sorted_path, dNumGroups, &agg_costs); /* Also convert # groups to long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); } else if (parse->distinctClause && sorted_path && !root->hasHavingQual && !parse->hasAggs && !activeWindows) { /* * We'll reach the DISTINCT stage without any intermediate * processing, so figure out whether we will want to hash or not * so we can choose whether to use cheapest or sorted path. */ use_hashed_distinct = choose_hashed_distinct(root, tuple_fraction, limit_tuples, path_rows, path_width, cheapest_path->startup_cost, cheapest_path->total_cost, sorted_path->startup_cost, sorted_path->total_cost, sorted_path->pathkeys, dNumGroups); tested_hashed_distinct = true; } /* * Select the best path. If we are doing hashed grouping, we will * always read all the input tuples, so use the cheapest-total path. * Otherwise, trust query_planner's decision about which to use. */ if (use_hashed_grouping || use_hashed_distinct || !sorted_path) best_path = cheapest_path; else best_path = sorted_path; /* * Check to see if it's possible to optimize MIN/MAX aggregates. If * so, we will forget all the work we did so far to choose a "regular" * path ... but we had to do it anyway to be able to tell which way is * cheaper. */ result_plan = optimize_minmax_aggregates(root, tlist, &agg_costs, best_path); if (result_plan != NULL) { /* * optimize_minmax_aggregates generated the full plan, with the * right tlist, and it has no sort order. */ current_pathkeys = NIL; } else { /* * Normal case --- create a plan according to query_planner's * results. */ bool need_sort_for_grouping = false; result_plan = create_plan(root, best_path); current_pathkeys = best_path->pathkeys; /* Detect if we'll need an explicit sort for grouping */ if (parse->groupClause && !use_hashed_grouping && !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { need_sort_for_grouping = true; /* * Always override create_plan's tlist, so that we don't sort * useless data from a "physical" tlist. */ need_tlist_eval = true; } /* * create_plan returns a plan with just a "flat" tlist of required * Vars. Usually we need to insert the sub_tlist as the tlist of * the top plan node. However, we can skip that if we determined * that whatever create_plan chose to return will be good enough. */ if (need_tlist_eval) { /* * If the top-level plan node is one that cannot do expression * evaluation, we must insert a Result node to project the * desired tlist. */ if (!is_projection_capable_plan(result_plan)) { result_plan = (Plan *) make_result(root, sub_tlist, NULL, result_plan); } else { /* * Otherwise, just replace the subplan's flat tlist with * the desired tlist. */ result_plan->targetlist = sub_tlist; } /* * Also, account for the cost of evaluation of the sub_tlist. * See comments for add_tlist_costs_to_plan() for more info. */ add_tlist_costs_to_plan(root, result_plan, sub_tlist); } else { /* * Since we're using create_plan's tlist and not the one * make_subplanTargetList calculated, we have to refigure any * grouping-column indexes make_subplanTargetList computed. */ locate_grouping_columns(root, tlist, result_plan->targetlist, groupColIdx); } /* * Insert AGG or GROUP node if needed, plus an explicit sort step * if necessary. * * HAVING clause, if any, becomes qual of the Agg or Group node. */ if (use_hashed_grouping) { /* Hashed aggregate plan --- no sort needed */ result_plan = (Plan *) make_agg(root, tlist, (List *) parse->havingQual, AGG_HASHED, &agg_costs, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), numGroups, result_plan); /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; } else if (parse->hasAggs) { /* Plain aggregate plan --- sort if needed */ AggStrategy aggstrategy; if (parse->groupClause) { if (need_sort_for_grouping) { result_plan = (Plan *) make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan); current_pathkeys = root->group_pathkeys; } aggstrategy = AGG_SORTED; /* * The AGG node will not change the sort ordering of its * groups, so current_pathkeys describes the result too. */ } else { aggstrategy = AGG_PLAIN; /* Result will be only one row anyway; no sort order */ current_pathkeys = NIL; } result_plan = (Plan *) make_agg(root, tlist, (List *) parse->havingQual, aggstrategy, &agg_costs, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), numGroups, result_plan); } else if (parse->groupClause) { /* * GROUP BY without aggregation, so insert a group node (plus * the appropriate sort node, if necessary). * * Add an explicit sort if we couldn't make the path come out * the way the GROUP node needs it. */ if (need_sort_for_grouping) { result_plan = (Plan *) make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan); current_pathkeys = root->group_pathkeys; } result_plan = (Plan *) make_group(root, tlist, (List *) parse->havingQual, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), dNumGroups, result_plan); /* The Group node won't change sort ordering */ } else if (root->hasHavingQual) { /* * No aggregates, and no GROUP BY, but we have a HAVING qual. * This is a degenerate case in which we are supposed to emit * either 0 or 1 row depending on whether HAVING succeeds. * Furthermore, there cannot be any variables in either HAVING * or the targetlist, so we actually do not need the FROM * table at all! We can just throw away the plan-so-far and * generate a Result node. This is a sufficiently unusual * corner case that it's not worth contorting the structure of * this routine to avoid having to generate the plan in the * first place. */ result_plan = (Plan *) make_result(root, tlist, parse->havingQual, NULL); } } /* end of non-minmax-aggregate case */ /* * Since each window function could require a different sort order, we * stack up a WindowAgg node for each window, with sort steps between * them as needed. */ if (activeWindows) { List *window_tlist; ListCell *l; /* * If the top-level plan node is one that cannot do expression * evaluation, we must insert a Result node to project the desired * tlist. (In some cases this might not really be required, but * it's not worth trying to avoid it.) Note that on second and * subsequent passes through the following loop, the top-level * node will be a WindowAgg which we know can project; so we only * need to check once. */ if (!is_projection_capable_plan(result_plan)) { result_plan = (Plan *) make_result(root, NIL, NULL, result_plan); } /* * The "base" targetlist for all steps of the windowing process is * a flat tlist of all Vars and Aggs needed in the result. (In * some cases we wouldn't need to propagate all of these all the * way to the top, since they might only be needed as inputs to * WindowFuncs. It's probably not worth trying to optimize that * though.) We also add window partitioning and sorting * expressions to the base tlist, to ensure they're computed only * once at the bottom of the stack (that's critical for volatile * functions). As we climb up the stack, we'll add outputs for * the WindowFuncs computed at each level. */ window_tlist = make_windowInputTargetList(root, tlist, activeWindows); /* * The copyObject steps here are needed to ensure that each plan * node has a separately modifiable tlist. (XXX wouldn't a * shallow list copy do for that?) */ result_plan->targetlist = (List *) copyObject(window_tlist); foreach(l, activeWindows) { WindowClause *wc = (WindowClause *) lfirst(l); List *window_pathkeys; int partNumCols; AttrNumber *partColIdx; Oid *partOperators; int ordNumCols; AttrNumber *ordColIdx; Oid *ordOperators; window_pathkeys = make_pathkeys_for_window(root, wc, tlist, true); /* * This is a bit tricky: we build a sort node even if we don't * really have to sort. Even when no explicit sort is needed, * we need to have suitable resjunk items added to the input * plan's tlist for any partitioning or ordering columns that * aren't plain Vars. (In theory, make_windowInputTargetList * should have provided all such columns, but let's not assume * that here.) Furthermore, this way we can use existing * infrastructure to identify which input columns are the * interesting ones. */ if (window_pathkeys) { Sort *sort_plan; sort_plan = make_sort_from_pathkeys(root, result_plan, window_pathkeys, -1.0); if (!pathkeys_contained_in(window_pathkeys, current_pathkeys)) { /* we do indeed need to sort */ result_plan = (Plan *) sort_plan; current_pathkeys = window_pathkeys; } /* In either case, extract the per-column information */ get_column_info_for_window(root, wc, tlist, sort_plan->numCols, sort_plan->sortColIdx, &partNumCols, &partColIdx, &partOperators, &ordNumCols, &ordColIdx, &ordOperators); } else { /* empty window specification, nothing to sort */ partNumCols = 0; partColIdx = NULL; partOperators = NULL; ordNumCols = 0; ordColIdx = NULL; ordOperators = NULL; } if (lnext(l)) { /* Add the current WindowFuncs to the running tlist */ window_tlist = add_to_flat_tlist(window_tlist, wflists->windowFuncs[wc->winref]); } else { /* Install the original tlist in the topmost WindowAgg */ window_tlist = tlist; } /* ... and make the WindowAgg plan node */ result_plan = (Plan *) make_windowagg(root, (List *) copyObject(window_tlist), wflists->windowFuncs[wc->winref], wc->winref, partNumCols, partColIdx, partOperators, ordNumCols, ordColIdx, ordOperators, wc->frameOptions, wc->startOffset, wc->endOffset, result_plan); } } } /* end of if (setOperations) */ /* * If there is a DISTINCT clause, add the necessary node(s). */ if (parse->distinctClause) { double dNumDistinctRows; long numDistinctRows; /* * If there was grouping or aggregation, use the current number of * rows as the estimated number of DISTINCT rows (ie, assume the * result was already mostly unique). If not, use the number of * distinct-groups calculated by query_planner. */ if (parse->groupClause || root->hasHavingQual || parse->hasAggs) dNumDistinctRows = result_plan->plan_rows; else dNumDistinctRows = dNumGroups; /* Also convert to long int --- but 'ware overflow! */ numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); /* Choose implementation method if we didn't already */ if (!tested_hashed_distinct) { /* * At this point, either hashed or sorted grouping will have to * work from result_plan, so we pass that as both "cheapest" and * "sorted". */ use_hashed_distinct = choose_hashed_distinct(root, tuple_fraction, limit_tuples, result_plan->plan_rows, result_plan->plan_width, result_plan->startup_cost, result_plan->total_cost, result_plan->startup_cost, result_plan->total_cost, current_pathkeys, dNumDistinctRows); } if (use_hashed_distinct) { /* Hashed aggregate plan --- no sort needed */ result_plan = (Plan *) make_agg(root, result_plan->targetlist, NIL, AGG_HASHED, NULL, list_length(parse->distinctClause), extract_grouping_cols(parse->distinctClause, result_plan->targetlist), extract_grouping_ops(parse->distinctClause), numDistinctRows, result_plan); /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; } else { /* * Use a Unique node to implement DISTINCT. Add an explicit sort * if we couldn't make the path come out the way the Unique node * needs it. If we do have to sort, always sort by the more * rigorous of DISTINCT and ORDER BY, to avoid a second sort * below. However, for regular DISTINCT, don't sort now if we * don't have to --- sorting afterwards will likely be cheaper, * and also has the possibility of optimizing via LIMIT. But for * DISTINCT ON, we *must* force the final sort now, else it won't * have the desired behavior. */ List *needed_pathkeys; if (parse->hasDistinctOn && list_length(root->distinct_pathkeys) < list_length(root->sort_pathkeys)) needed_pathkeys = root->sort_pathkeys; else needed_pathkeys = root->distinct_pathkeys; if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) { if (list_length(root->distinct_pathkeys) >= list_length(root->sort_pathkeys)) current_pathkeys = root->distinct_pathkeys; else { current_pathkeys = root->sort_pathkeys; /* Assert checks that parser didn't mess up... */ Assert(pathkeys_contained_in(root->distinct_pathkeys, current_pathkeys)); } result_plan = (Plan *) make_sort_from_pathkeys(root, result_plan, current_pathkeys, -1.0); } result_plan = (Plan *) make_unique(result_plan, parse->distinctClause); result_plan->plan_rows = dNumDistinctRows; /* The Unique node won't change sort ordering */ } } /* * If ORDER BY was given and we were not able to make the plan come out in * the right order, add an explicit sort step. */ if (parse->sortClause) { if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) { result_plan = (Plan *) make_sort_from_pathkeys(root, result_plan, root->sort_pathkeys, limit_tuples); current_pathkeys = root->sort_pathkeys; } } /* * If there is a FOR UPDATE/SHARE clause, add the LockRows node. (Note: we * intentionally test parse->rowMarks not root->rowMarks here. If there * are only non-locking rowmarks, they should be handled by the * ModifyTable node instead.) */ if (parse->rowMarks) { result_plan = (Plan *) make_lockrows(result_plan, root->rowMarks, SS_assign_special_param(root)); /* * The result can no longer be assumed sorted, since locking might * cause the sort key columns to be replaced with new values. */ current_pathkeys = NIL; } /* * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node. */ if (parse->limitCount || parse->limitOffset) { result_plan = (Plan *) make_limit(result_plan, parse->limitOffset, parse->limitCount, offset_est, count_est); } /* * Return the actual output ordering in query_pathkeys for possible use by * an outer query level. */ root->query_pathkeys = current_pathkeys; return result_plan; }
简化:
/*-------------------- * grouping_planner * Perform planning steps related to grouping, aggregation, etc. * This primarily means adding top-level processing to the basic * query plan produced by query_planner. * * tuple_fraction is the fraction of tuples we expect will be retrieved * * tuple_fraction is interpreted as follows: * 0: expect all tuples to be retrieved (normal case) * 0 < tuple_fraction < 1: expect the given fraction of tuples available * from the plan to be retrieved * tuple_fraction >= 1: tuple_fraction is the absolute number of tuples * expected to be retrieved (ie, a LIMIT specification) * * Returns a query plan. Also, root->query_pathkeys is returned as the * actual output ordering of the plan (in pathkey format). *-------------------- */ static Plan * grouping_planner(PlannerInfo *root, double tuple_fraction) { ... if (parse->setOperations) { ... } else { ... /* * Generate the best unsorted and presorted paths for this Query (but * note there may not be any presorted path). query_planner will also * estimate the number of groups in the query, and canonicalize all * the pathkeys. */ query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples, &cheapest_path, &sorted_path, &dNumGroups); ... } /* end of if (setOperations) */ ... return result_plan; }