PostgreSQL在何处处理 sql查询之五十
接前面:
Query *parse = root->parse; List *tlist = parse->targetList; int64 offset_est = 0; int64 count_est = 0; double limit_tuples = -1.0; Plan *result_plan; List *current_pathkeys; double dNumGroups = 0; bool use_hashed_distinct = false; bool tested_hashed_distinct = false; /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ if (parse->limitCount || parse->limitOffset) { tuple_fraction = preprocess_limit(root, tuple_fraction, &offset_est, &count_est); /* * If we have a known LIMIT, and don't have an unknown OFFSET, we can * estimate the effects of using a bounded sort. */ if (count_est > 0 && offset_est >= 0) limit_tuples = (double) count_est + (double) offset_est; } if (parse->setOperations) { List *set_sortclauses; /* * If there's a top-level ORDER BY, assume we have to fetch all the * tuples. This might be too simplistic given all the hackery below * to possibly avoid the sort; but the odds of accurate estimates here * are pretty low anyway. */ if (parse->sortClause) tuple_fraction = 0.0; /* * Construct the plan for set operations. The result will not need * any work except perhaps a top-level sort and/or LIMIT. Note that * any special work for recursive unions is the responsibility of * plan_set_operations. */ result_plan = plan_set_operations(root, tuple_fraction, &set_sortclauses); /* * Calculate pathkeys representing the sort order (if any) of the set * operation's result. We have to do this before overwriting the sort * key information... */ current_pathkeys = make_pathkeys_for_sortclauses(root, set_sortclauses, result_plan->targetlist, true); /* * We should not need to call preprocess_targetlist, since we must be * in a SELECT query node. Instead, use the targetlist returned by * plan_set_operations (since this tells whether it returned any * resjunk columns!), and transfer any sort key information from the * original tlist. */ Assert(parse->commandType == CMD_SELECT); tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist), tlist); /* * Can't handle FOR UPDATE/SHARE here (parser should have checked * already, but let's make sure). */ if (parse->rowMarks) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT"))); /* * Calculate pathkeys that represent result ordering requirements */ Assert(parse->distinctClause == NIL); root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, true); } else { /* No set operations, do regular planning */ List *sub_tlist; double sub_limit_tuples; AttrNumber *groupColIdx = NULL; bool need_tlist_eval = true; Path *cheapest_path; Path *sorted_path; Path *best_path; long numGroups = 0; AggClauseCosts agg_costs; int numGroupCols; double path_rows; int path_width; bool use_hashed_grouping = false; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); /* A recursive query should always have setOperations */ Assert(!root->hasRecursion); /* Preprocess GROUP BY clause, if any */ if (parse->groupClause) preprocess_groupclause(root); numGroupCols = list_length(parse->groupClause); /* Preprocess targetlist */ tlist = preprocess_targetlist(root, tlist); /* * Locate any window functions in the tlist. (We don't need to look * anywhere else, since expressions used in ORDER BY will be in there * too.) Note that they could all have been eliminated by constant * folding, in which case we don't need to do any more work. */ if (parse->hasWindowFuncs) { wflists = find_window_functions((Node *) tlist, list_length(parse->windowClause)); if (wflists->numWindowFuncs > 0) activeWindows = select_active_windows(root, wflists); else parse->hasWindowFuncs = false; } /* * Generate appropriate target list for subplan; may be different from * tlist if grouping or aggregation is needed. */ sub_tlist = make_subplanTargetList(root, tlist, &groupColIdx, &need_tlist_eval); /* * Do aggregate preprocessing, if the query has any aggs. * * Note: think not that we can turn off hasAggs if we find no aggs. It * is possible for constant-expression simplification to remove all * explicit references to aggs, but we still have to follow the * aggregate semantics (eg, producing only one output row). */ if (parse->hasAggs) { /* * Collect statistics about aggregates for estimating costs. Note: * we do not attempt to detect duplicate aggregates here; a * somewhat-overestimated cost is okay for our present purposes. */ count_agg_clauses(root, (Node *) tlist, &agg_costs); count_agg_clauses(root, parse->havingQual, &agg_costs); /* * Preprocess MIN/MAX aggregates, if any. Note: be careful about * adding logic between here and the optimize_minmax_aggregates * call. Anything that is needed in MIN/MAX-optimizable cases * will have to be duplicated in planagg.c. */ preprocess_minmax_aggregates(root, tlist); } /* * Calculate pathkeys that represent grouping/ordering requirements. * Stash them in PlannerInfo so that query_planner can canonicalize * them after EquivalenceClasses have been formed. The sortClause is * certainly sort-able, but GROUP BY and DISTINCT might not be, in * which case we just leave their pathkeys empty. */ if (parse->groupClause && grouping_is_sortable(parse->groupClause)) root->group_pathkeys = make_pathkeys_for_sortclauses(root, parse->groupClause, tlist, false); else root->group_pathkeys = NIL; /* We consider only the first (bottom) window in pathkeys logic */ if (activeWindows != NIL) { WindowClause *wc = (WindowClause *) linitial(activeWindows); root->window_pathkeys = make_pathkeys_for_window(root, wc, tlist, false); } else root->window_pathkeys = NIL; if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) root->distinct_pathkeys = make_pathkeys_for_sortclauses(root, parse->distinctClause, tlist, false); else root->distinct_pathkeys = NIL; root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, false); /* * Figure out whether we want a sorted result from query_planner. * * If we have a sortable GROUP BY clause, then we want a result sorted * properly for grouping. Otherwise, if we have window functions to * evaluate, we try to sort for the first window. Otherwise, if * there's a sortable DISTINCT clause that's more rigorous than the * ORDER BY clause, we try to produce output that's sufficiently well * sorted for the DISTINCT. Otherwise, if there is an ORDER BY * clause, we want to sort by the ORDER BY clause. * * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a * superset of GROUP BY, it would be tempting to request sort by ORDER * BY --- but that might just leave us failing to exploit an available * sort order at all. Needs more thought. The choice for DISTINCT * versus ORDER BY is much easier, since we know that the parser * ensured that one is a superset of the other. */ if (root->group_pathkeys) root->query_pathkeys = root->group_pathkeys; else if (root->window_pathkeys) root->query_pathkeys = root->window_pathkeys; else if (list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys)) root->query_pathkeys = root->distinct_pathkeys; else if (root->sort_pathkeys) root->query_pathkeys = root->sort_pathkeys; else root->query_pathkeys = NIL; /* * Figure out whether there's a hard limit on the number of rows that * query_planner's result subplan needs to return. Even if we know a * hard limit overall, it doesn't apply if the query has any * grouping/aggregation operations. */ if (parse->groupClause || parse->distinctClause || parse->hasAggs || parse->hasWindowFuncs || root->hasHavingQual) sub_limit_tuples = -1.0; else sub_limit_tuples = limit_tuples; /* * Generate the best unsorted and presorted paths for this Query (but * note there may not be any presorted path). query_planner will also * estimate the number of groups in the query, and canonicalize all * the pathkeys. */ query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples, &cheapest_path, &sorted_path, &dNumGroups); /* * Extract rowcount and width estimates for possible use in grouping * decisions. Beware here of the possibility that * cheapest_path->parent is NULL (ie, there is no FROM clause). */ if (cheapest_path->parent) { path_rows = cheapest_path->parent->rows; path_width = cheapest_path->parent->width; } else { path_rows = 1; /* assume non-set result */ path_width = 100; /* arbitrary */ } if (parse->groupClause) { /* * If grouping, decide whether to use sorted or hashed grouping. */ use_hashed_grouping = choose_hashed_grouping(root, tuple_fraction, limit_tuples, path_rows, path_width, cheapest_path, sorted_path, dNumGroups, &agg_costs); /* Also convert # groups to long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); } else if (parse->distinctClause && sorted_path && !root->hasHavingQual && !parse->hasAggs && !activeWindows) { /* * We'll reach the DISTINCT stage without any intermediate * processing, so figure out whether we will want to hash or not * so we can choose whether to use cheapest or sorted path. */ use_hashed_distinct = choose_hashed_distinct(root, tuple_fraction, limit_tuples, path_rows, path_width, cheapest_path->startup_cost, cheapest_path->total_cost, sorted_path->startup_cost, sorted_path->total_cost, sorted_path->pathkeys, dNumGroups); tested_hashed_distinct = true; } /* * Select the best path. If we are doing hashed grouping, we will * always read all the input tuples, so use the cheapest-total path. * Otherwise, trust query_planner's decision about which to use. */ if (use_hashed_grouping || use_hashed_distinct || !sorted_path) best_path = cheapest_path; else best_path = sorted_path; /* * Check to see if it's possible to optimize MIN/MAX aggregates. If * so, we will forget all the work we did so far to choose a "regular" * path ... but we had to do it anyway to be able to tell which way is * cheaper. */ result_plan = optimize_minmax_aggregates(root, tlist, &agg_costs, best_path); if (result_plan != NULL) { /* * optimize_minmax_aggregates generated the full plan, with the * right tlist, and it has no sort order. */ current_pathkeys = NIL; } else { /* * Normal case --- create a plan according to query_planner's * results. */ bool need_sort_for_grouping = false; result_plan = create_plan(root, best_path); current_pathkeys = best_path->pathkeys; /* Detect if we'll need an explicit sort for grouping */ if (parse->groupClause && !use_hashed_grouping && !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { need_sort_for_grouping = true; /* * Always override create_plan's tlist, so that we don't sort * useless data from a "physical" tlist. */ need_tlist_eval = true; } /* * create_plan returns a plan with just a "flat" tlist of required * Vars. Usually we need to insert the sub_tlist as the tlist of * the top plan node. However, we can skip that if we determined * that whatever create_plan chose to return will be good enough. */ if (need_tlist_eval) { /* * If the top-level plan node is one that cannot do expression * evaluation, we must insert a Result node to project the * desired tlist. */ if (!is_projection_capable_plan(result_plan)) { result_plan = (Plan *) make_result(root, sub_tlist, NULL, result_plan); } else { /* * Otherwise, just replace the subplan's flat tlist with * the desired tlist. */ result_plan->targetlist = sub_tlist; } /* * Also, account for the cost of evaluation of the sub_tlist. * See comments for add_tlist_costs_to_plan() for more info. */ add_tlist_costs_to_plan(root, result_plan, sub_tlist); } else { /* * Since we're using create_plan's tlist and not the one * make_subplanTargetList calculated, we have to refigure any * grouping-column indexes make_subplanTargetList computed. */ locate_grouping_columns(root, tlist, result_plan->targetlist, groupColIdx); } /* * Insert AGG or GROUP node if needed, plus an explicit sort step * if necessary. * * HAVING clause, if any, becomes qual of the Agg or Group node. */ if (use_hashed_grouping) { /* Hashed aggregate plan --- no sort needed */ result_plan = (Plan *) make_agg(root, tlist, (List *) parse->havingQual, AGG_HASHED, &agg_costs, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), numGroups, result_plan); /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; } else if (parse->hasAggs) { /* Plain aggregate plan --- sort if needed */ AggStrategy aggstrategy; if (parse->groupClause) { if (need_sort_for_grouping) { result_plan = (Plan *) make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan); current_pathkeys = root->group_pathkeys; } aggstrategy = AGG_SORTED; /* * The AGG node will not change the sort ordering of its * groups, so current_pathkeys describes the result too. */ } else { aggstrategy = AGG_PLAIN; /* Result will be only one row anyway; no sort order */ current_pathkeys = NIL; } result_plan = (Plan *) make_agg(root, tlist, (List *) parse->havingQual, aggstrategy, &agg_costs, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), numGroups, result_plan); } else if (parse->groupClause) { /* * GROUP BY without aggregation, so insert a group node (plus * the appropriate sort node, if necessary). * * Add an explicit sort if we couldn't make the path come out * the way the GROUP node needs it. */ if (need_sort_for_grouping) { result_plan = (Plan *) make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan); current_pathkeys = root->group_pathkeys; } result_plan = (Plan *) make_group(root, tlist, (List *) parse->havingQual, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), dNumGroups, result_plan); /* The Group node won't change sort ordering */ } else if (root->hasHavingQual) { /* * No aggregates, and no GROUP BY, but we have a HAVING qual. * This is a degenerate case in which we are supposed to emit * either 0 or 1 row depending on whether HAVING succeeds. * Furthermore, there cannot be any variables in either HAVING * or the targetlist, so we actually do not need the FROM * table at all! We can just throw away the plan-so-far and * generate a Result node. This is a sufficiently unusual * corner case that it's not worth contorting the structure of * this routine to avoid having to generate the plan in the * first place. */ result_plan = (Plan *) make_result(root, tlist, parse->havingQual, NULL); } } /* end of non-minmax-aggregate case */ /* * Since each window function could require a different sort order, we * stack up a WindowAgg node for each window, with sort steps between * them as needed. */ if (activeWindows) { List *window_tlist; ListCell *l; /* * If the top-level plan node is one that cannot do expression * evaluation, we must insert a Result node to project the desired * tlist. (In some cases this might not really be required, but * it's not worth trying to avoid it.) Note that on second and * subsequent passes through the following loop, the top-level * node will be a WindowAgg which we know can project; so we only * need to check once. */ if (!is_projection_capable_plan(result_plan)) { result_plan = (Plan *) make_result(root, NIL, NULL, result_plan); } /* * The "base" targetlist for all steps of the windowing process is * a flat tlist of all Vars and Aggs needed in the result. (In * some cases we wouldn't need to propagate all of these all the * way to the top, since they might only be needed as inputs to * WindowFuncs. It's probably not worth trying to optimize that * though.) We also add window partitioning and sorting * expressions to the base tlist, to ensure they're computed only * once at the bottom of the stack (that's critical for volatile * functions). As we climb up the stack, we'll add outputs for * the WindowFuncs computed at each level. */ window_tlist = make_windowInputTargetList(root, tlist, activeWindows); /* * The copyObject steps here are needed to ensure that each plan * node has a separately modifiable tlist. (XXX wouldn't a * shallow list copy do for that?) */ result_plan->targetlist = (List *) copyObject(window_tlist); foreach(l, activeWindows) { WindowClause *wc = (WindowClause *) lfirst(l); List *window_pathkeys; int partNumCols; AttrNumber *partColIdx; Oid *partOperators; int ordNumCols; AttrNumber *ordColIdx; Oid *ordOperators; window_pathkeys = make_pathkeys_for_window(root, wc, tlist, true); /* * This is a bit tricky: we build a sort node even if we don't * really have to sort. Even when no explicit sort is needed, * we need to have suitable resjunk items added to the input * plan's tlist for any partitioning or ordering columns that * aren't plain Vars. (In theory, make_windowInputTargetList * should have provided all such columns, but let's not assume * that here.) Furthermore, this way we can use existing * infrastructure to identify which input columns are the * interesting ones. */ if (window_pathkeys) { Sort *sort_plan; sort_plan = make_sort_from_pathkeys(root, result_plan, window_pathkeys, -1.0); if (!pathkeys_contained_in(window_pathkeys, current_pathkeys)) { /* we do indeed need to sort */ result_plan = (Plan *) sort_plan; current_pathkeys = window_pathkeys; } /* In either case, extract the per-column information */ get_column_info_for_window(root, wc, tlist, sort_plan->numCols, sort_plan->sortColIdx, &partNumCols, &partColIdx, &partOperators, &ordNumCols, &ordColIdx, &ordOperators); } else { /* empty window specification, nothing to sort */ partNumCols = 0; partColIdx = NULL; partOperators = NULL; ordNumCols = 0; ordColIdx = NULL; ordOperators = NULL; } if (lnext(l)) { /* Add the current WindowFuncs to the running tlist */ window_tlist = add_to_flat_tlist(window_tlist, wflists->windowFuncs[wc->winref]); } else { /* Install the original tlist in the topmost WindowAgg */ window_tlist = tlist; } /* ... and make the WindowAgg plan node */ result_plan = (Plan *) make_windowagg(root, (List *) copyObject(window_tlist), wflists->windowFuncs[wc->winref], wc->winref, partNumCols, partColIdx, partOperators, ordNumCols, ordColIdx, ordOperators, wc->frameOptions, wc->startOffset, wc->endOffset, result_plan); } } } /* end of if (setOperations) */ /* * If there is a DISTINCT clause, add the necessary node(s). */ if (parse->distinctClause) { double dNumDistinctRows; long numDistinctRows; /* * If there was grouping or aggregation, use the current number of * rows as the estimated number of DISTINCT rows (ie, assume the * result was already mostly unique). If not, use the number of * distinct-groups calculated by query_planner. */ if (parse->groupClause || root->hasHavingQual || parse->hasAggs) dNumDistinctRows = result_plan->plan_rows; else dNumDistinctRows = dNumGroups; /* Also convert to long int --- but 'ware overflow! */ numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX); /* Choose implementation method if we didn't already */ if (!tested_hashed_distinct) { /* * At this point, either hashed or sorted grouping will have to * work from result_plan, so we pass that as both "cheapest" and * "sorted". */ use_hashed_distinct = choose_hashed_distinct(root, tuple_fraction, limit_tuples, result_plan->plan_rows, result_plan->plan_width, result_plan->startup_cost, result_plan->total_cost, result_plan->startup_cost, result_plan->total_cost, current_pathkeys, dNumDistinctRows); } if (use_hashed_distinct) { /* Hashed aggregate plan --- no sort needed */ result_plan = (Plan *) make_agg(root, result_plan->targetlist, NIL, AGG_HASHED, NULL, list_length(parse->distinctClause), extract_grouping_cols(parse->distinctClause, result_plan->targetlist), extract_grouping_ops(parse->distinctClause), numDistinctRows, result_plan); /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; } else { /* * Use a Unique node to implement DISTINCT. Add an explicit sort * if we couldn't make the path come out the way the Unique node * needs it. If we do have to sort, always sort by the more * rigorous of DISTINCT and ORDER BY, to avoid a second sort * below. However, for regular DISTINCT, don't sort now if we * don't have to --- sorting afterwards will likely be cheaper, * and also has the possibility of optimizing via LIMIT. But for * DISTINCT ON, we *must* force the final sort now, else it won't * have the desired behavior. */ List *needed_pathkeys; if (parse->hasDistinctOn && list_length(root->distinct_pathkeys) < list_length(root->sort_pathkeys)) needed_pathkeys = root->sort_pathkeys; else needed_pathkeys = root->distinct_pathkeys; if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys)) { if (list_length(root->distinct_pathkeys) >= list_length(root->sort_pathkeys)) current_pathkeys = root->distinct_pathkeys; else { current_pathkeys = root->sort_pathkeys; /* Assert checks that parser didn't mess up... */ Assert(pathkeys_contained_in(root->distinct_pathkeys, current_pathkeys)); } result_plan = (Plan *) make_sort_from_pathkeys(root, result_plan, current_pathkeys, -1.0); } result_plan = (Plan *) make_unique(result_plan, parse->distinctClause); result_plan->plan_rows = dNumDistinctRows; /* The Unique node won't change sort ordering */ } } /* * If ORDER BY was given and we were not able to make the plan come out in * the right order, add an explicit sort step. */ if (parse->sortClause) { if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) { result_plan = (Plan *) make_sort_from_pathkeys(root, result_plan, root->sort_pathkeys, limit_tuples); current_pathkeys = root->sort_pathkeys; } } /* * If there is a FOR UPDATE/SHARE clause, add the LockRows node. (Note: we * intentionally test parse->rowMarks not root->rowMarks here. If there * are only non-locking rowmarks, they should be handled by the * ModifyTable node instead.) */ if (parse->rowMarks) { result_plan = (Plan *) make_lockrows(result_plan, root->rowMarks, SS_assign_special_param(root)); /* * The result can no longer be assumed sorted, since locking might * cause the sort key columns to be replaced with new values. */ current_pathkeys = NIL; } /* * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node. */ if (parse->limitCount || parse->limitOffset) { result_plan = (Plan *) make_limit(result_plan, parse->limitOffset, parse->limitCount, offset_est, count_est); } /* * Return the actual output ordering in query_pathkeys for possible use by * an outer query level. */ root->query_pathkeys = current_pathkeys; return result_plan;
对此进行一步一步地分析:
对我的简单查询,因为 条件不成立,以下这段被无视:
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */ if (parse->limitCount || parse->limitOffset) { tuple_fraction = preprocess_limit(root, tuple_fraction, &offset_est, &count_est); /* * If we have a known LIMIT, and don't have an unknown OFFSET, we can * estimate the effects of using a bounded sort. */ if (count_est > 0 && offset_est >= 0) limit_tuples = (double) count_est + (double) offset_est; }
接下来:由于我的简单查询非 set 运算,所以简化为:
if (parse->setOperations) { ... } else { /* No set operations, do regular planning */ List *sub_tlist; double sub_limit_tuples; AttrNumber *groupColIdx = NULL; bool need_tlist_eval = true; Path *cheapest_path; Path *sorted_path; Path *best_path; long numGroups = 0; AggClauseCosts agg_costs; int numGroupCols; double path_rows; int path_width; bool use_hashed_grouping = false; WindowFuncLists *wflists = NULL; List *activeWindows = NIL; MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); /* A recursive query should always have setOperations */ Assert(!root->hasRecursion); /* Preprocess GROUP BY clause, if any */ if (parse->groupClause) preprocess_groupclause(root); numGroupCols = list_length(parse->groupClause); /* Preprocess targetlist */ tlist = preprocess_targetlist(root, tlist); /* * Locate any window functions in the tlist. (We don't need to look * anywhere else, since expressions used in ORDER BY will be in there * too.) Note that they could all have been eliminated by constant * folding, in which case we don't need to do any more work. */ if (parse->hasWindowFuncs) { wflists = find_window_functions((Node *) tlist, list_length(parse->windowClause)); if (wflists->numWindowFuncs > 0) activeWindows = select_active_windows(root, wflists); else parse->hasWindowFuncs = false; } /* * Generate appropriate target list for subplan; may be different from * tlist if grouping or aggregation is needed. */ sub_tlist = make_subplanTargetList(root, tlist, &groupColIdx, &need_tlist_eval); /* * Do aggregate preprocessing, if the query has any aggs. * * Note: think not that we can turn off hasAggs if we find no aggs. It * is possible for constant-expression simplification to remove all * explicit references to aggs, but we still have to follow the * aggregate semantics (eg, producing only one output row). */ if (parse->hasAggs) { /* * Collect statistics about aggregates for estimating costs. Note: * we do not attempt to detect duplicate aggregates here; a * somewhat-overestimated cost is okay for our present purposes. */ count_agg_clauses(root, (Node *) tlist, &agg_costs); count_agg_clauses(root, parse->havingQual, &agg_costs); /* * Preprocess MIN/MAX aggregates, if any. Note: be careful about * adding logic between here and the optimize_minmax_aggregates * call. Anything that is needed in MIN/MAX-optimizable cases * will have to be duplicated in planagg.c. */ preprocess_minmax_aggregates(root, tlist); } /* * Calculate pathkeys that represent grouping/ordering requirements. * Stash them in PlannerInfo so that query_planner can canonicalize * them after EquivalenceClasses have been formed. The sortClause is * certainly sort-able, but GROUP BY and DISTINCT might not be, in * which case we just leave their pathkeys empty. */ if (parse->groupClause && grouping_is_sortable(parse->groupClause)) root->group_pathkeys = make_pathkeys_for_sortclauses(root, parse->groupClause, tlist, false); else root->group_pathkeys = NIL; /* We consider only the first (bottom) window in pathkeys logic */ if (activeWindows != NIL) { WindowClause *wc = (WindowClause *) linitial(activeWindows); root->window_pathkeys = make_pathkeys_for_window(root, wc, tlist, false); } else root->window_pathkeys = NIL; if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) root->distinct_pathkeys = make_pathkeys_for_sortclauses(root, parse->distinctClause, tlist, false); else root->distinct_pathkeys = NIL; root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, false); /* * Figure out whether we want a sorted result from query_planner. * * If we have a sortable GROUP BY clause, then we want a result sorted * properly for grouping. Otherwise, if we have window functions to * evaluate, we try to sort for the first window. Otherwise, if * there's a sortable DISTINCT clause that's more rigorous than the * ORDER BY clause, we try to produce output that's sufficiently well * sorted for the DISTINCT. Otherwise, if there is an ORDER BY * clause, we want to sort by the ORDER BY clause. * * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a * superset of GROUP BY, it would be tempting to request sort by ORDER * BY --- but that might just leave us failing to exploit an available * sort order at all. Needs more thought. The choice for DISTINCT * versus ORDER BY is much easier, since we know that the parser * ensured that one is a superset of the other. */ if (root->group_pathkeys) root->query_pathkeys = root->group_pathkeys; else if (root->window_pathkeys) root->query_pathkeys = root->window_pathkeys; else if (list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys)) root->query_pathkeys = root->distinct_pathkeys; else if (root->sort_pathkeys) root->query_pathkeys = root->sort_pathkeys; else root->query_pathkeys = NIL; /* * Figure out whether there's a hard limit on the number of rows that * query_planner's result subplan needs to return. Even if we know a * hard limit overall, it doesn't apply if the query has any * grouping/aggregation operations. */ if (parse->groupClause || parse->distinctClause || parse->hasAggs || parse->hasWindowFuncs || root->hasHavingQual) sub_limit_tuples = -1.0; else sub_limit_tuples = limit_tuples; /* * Generate the best unsorted and presorted paths for this Query (but * note there may not be any presorted path). query_planner will also * estimate the number of groups in the query, and canonicalize all * the pathkeys. */ query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples, &cheapest_path, &sorted_path, &dNumGroups); /* * Extract rowcount and width estimates for possible use in grouping * decisions. Beware here of the possibility that * cheapest_path->parent is NULL (ie, there is no FROM clause). */ if (cheapest_path->parent) { path_rows = cheapest_path->parent->rows; path_width = cheapest_path->parent->width; } else { path_rows = 1; /* assume non-set result */ path_width = 100; /* arbitrary */ } if (parse->groupClause) { /* * If grouping, decide whether to use sorted or hashed grouping. */ use_hashed_grouping = choose_hashed_grouping(root, tuple_fraction, limit_tuples, path_rows, path_width, cheapest_path, sorted_path, dNumGroups, &agg_costs); /* Also convert # groups to long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); } else if (parse->distinctClause && sorted_path && !root->hasHavingQual && !parse->hasAggs && !activeWindows) { /* * We'll reach the DISTINCT stage without any intermediate * processing, so figure out whether we will want to hash or not * so we can choose whether to use cheapest or sorted path. */ use_hashed_distinct = choose_hashed_distinct(root, tuple_fraction, limit_tuples, path_rows, path_width, cheapest_path->startup_cost, cheapest_path->total_cost, sorted_path->startup_cost, sorted_path->total_cost, sorted_path->pathkeys, dNumGroups); tested_hashed_distinct = true; } /* * Select the best path. If we are doing hashed grouping, we will * always read all the input tuples, so use the cheapest-total path. * Otherwise, trust query_planner's decision about which to use. */ if (use_hashed_grouping || use_hashed_distinct || !sorted_path) best_path = cheapest_path; else best_path = sorted_path; /* * Check to see if it's possible to optimize MIN/MAX aggregates. If * so, we will forget all the work we did so far to choose a "regular" * path ... but we had to do it anyway to be able to tell which way is * cheaper. */ result_plan = optimize_minmax_aggregates(root, tlist, &agg_costs, best_path); if (result_plan != NULL) { /* * optimize_minmax_aggregates generated the full plan, with the * right tlist, and it has no sort order. */ current_pathkeys = NIL; } else { /* * Normal case --- create a plan according to query_planner's * results. */ bool need_sort_for_grouping = false; result_plan = create_plan(root, best_path); current_pathkeys = best_path->pathkeys; /* Detect if we'll need an explicit sort for grouping */ if (parse->groupClause && !use_hashed_grouping && !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { need_sort_for_grouping = true; /* * Always override create_plan's tlist, so that we don't sort * useless data from a "physical" tlist. */ need_tlist_eval = true; } /* * create_plan returns a plan with just a "flat" tlist of required * Vars. Usually we need to insert the sub_tlist as the tlist of * the top plan node. However, we can skip that if we determined * that whatever create_plan chose to return will be good enough. */ if (need_tlist_eval) { /* * If the top-level plan node is one that cannot do expression * evaluation, we must insert a Result node to project the * desired tlist. */ if (!is_projection_capable_plan(result_plan)) { result_plan = (Plan *) make_result(root, sub_tlist, NULL, result_plan); } else { /* * Otherwise, just replace the subplan's flat tlist with * the desired tlist. */ result_plan->targetlist = sub_tlist; } /* * Also, account for the cost of evaluation of the sub_tlist. * See comments for add_tlist_costs_to_plan() for more info. */ add_tlist_costs_to_plan(root, result_plan, sub_tlist); } else { /* * Since we're using create_plan's tlist and not the one * make_subplanTargetList calculated, we have to refigure any * grouping-column indexes make_subplanTargetList computed. */ locate_grouping_columns(root, tlist, result_plan->targetlist, groupColIdx); } /* * Insert AGG or GROUP node if needed, plus an explicit sort step * if necessary. * * HAVING clause, if any, becomes qual of the Agg or Group node. */ if (use_hashed_grouping) { /* Hashed aggregate plan --- no sort needed */ result_plan = (Plan *) make_agg(root, tlist, (List *) parse->havingQual, AGG_HASHED, &agg_costs, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), numGroups, result_plan); /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; } else if (parse->hasAggs) { /* Plain aggregate plan --- sort if needed */ AggStrategy aggstrategy; if (parse->groupClause) { if (need_sort_for_grouping) { result_plan = (Plan *) make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan); current_pathkeys = root->group_pathkeys; } aggstrategy = AGG_SORTED; /* * The AGG node will not change the sort ordering of its * groups, so current_pathkeys describes the result too. */ } else { aggstrategy = AGG_PLAIN; /* Result will be only one row anyway; no sort order */ current_pathkeys = NIL; } result_plan = (Plan *) make_agg(root, tlist, (List *) parse->havingQual, aggstrategy, &agg_costs, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), numGroups, result_plan); } else if (parse->groupClause) { /* * GROUP BY without aggregation, so insert a group node (plus * the appropriate sort node, if necessary). * * Add an explicit sort if we couldn't make the path come out * the way the GROUP node needs it. */ if (need_sort_for_grouping) { result_plan = (Plan *) make_sort_from_groupcols(root, parse->groupClause, groupColIdx, result_plan); current_pathkeys = root->group_pathkeys; } result_plan = (Plan *) make_group(root, tlist, (List *) parse->havingQual, numGroupCols, groupColIdx, extract_grouping_ops(parse->groupClause), dNumGroups, result_plan); /* The Group node won't change sort ordering */ } else if (root->hasHavingQual) { /* * No aggregates, and no GROUP BY, but we have a HAVING qual. * This is a degenerate case in which we are supposed to emit * either 0 or 1 row depending on whether HAVING succeeds. * Furthermore, there cannot be any variables in either HAVING * or the targetlist, so we actually do not need the FROM * table at all! We can just throw away the plan-so-far and * generate a Result node. This is a sufficiently unusual * corner case that it's not worth contorting the structure of * this routine to avoid having to generate the plan in the * first place. */ result_plan = (Plan *) make_result(root, tlist, parse->havingQual, NULL); } } /* end of non-minmax-aggregate case */ /* * Since each window function could require a different sort order, we * stack up a WindowAgg node for each window, with sort steps between * them as needed. */ if (activeWindows) { List *window_tlist; ListCell *l; /* * If the top-level plan node is one that cannot do expression * evaluation, we must insert a Result node to project the desired * tlist. (In some cases this might not really be required, but * it's not worth trying to avoid it.) Note that on second and * subsequent passes through the following loop, the top-level * node will be a WindowAgg which we know can project; so we only * need to check once. */ if (!is_projection_capable_plan(result_plan)) { result_plan = (Plan *) make_result(root, NIL, NULL, result_plan); } /* * The "base" targetlist for all steps of the windowing process is * a flat tlist of all Vars and Aggs needed in the result. (In * some cases we wouldn't need to propagate all of these all the * way to the top, since they might only be needed as inputs to * WindowFuncs. It's probably not worth trying to optimize that * though.) We also add window partitioning and sorting * expressions to the base tlist, to ensure they're computed only * once at the bottom of the stack (that's critical for volatile * functions). As we climb up the stack, we'll add outputs for * the WindowFuncs computed at each level. */ window_tlist = make_windowInputTargetList(root, tlist, activeWindows); /* * The copyObject steps here are needed to ensure that each plan * node has a separately modifiable tlist. (XXX wouldn't a * shallow list copy do for that?) */ result_plan->targetlist = (List *) copyObject(window_tlist); foreach(l, activeWindows) { WindowClause *wc = (WindowClause *) lfirst(l); List *window_pathkeys; int partNumCols; AttrNumber *partColIdx; Oid *partOperators; int ordNumCols; AttrNumber *ordColIdx; Oid *ordOperators; window_pathkeys = make_pathkeys_for_window(root, wc, tlist, true); /* * This is a bit tricky: we build a sort node even if we don't * really have to sort. Even when no explicit sort is needed, * we need to have suitable resjunk items added to the input * plan's tlist for any partitioning or ordering columns that * aren't plain Vars. (In theory, make_windowInputTargetList * should have provided all such columns, but let's not assume * that here.) Furthermore, this way we can use existing * infrastructure to identify which input columns are the * interesting ones. */ if (window_pathkeys) { Sort *sort_plan; sort_plan = make_sort_from_pathkeys(root, result_plan, window_pathkeys, -1.0); if (!pathkeys_contained_in(window_pathkeys, current_pathkeys)) { /* we do indeed need to sort */ result_plan = (Plan *) sort_plan; current_pathkeys = window_pathkeys; } /* In either case, extract the per-column information */ get_column_info_for_window(root, wc, tlist, sort_plan->numCols, sort_plan->sortColIdx, &partNumCols, &partColIdx, &partOperators, &ordNumCols, &ordColIdx, &ordOperators); } else { /* empty window specification, nothing to sort */ partNumCols = 0; partColIdx = NULL; partOperators = NULL; ordNumCols = 0; ordColIdx = NULL; ordOperators = NULL; } if (lnext(l)) { /* Add the current WindowFuncs to the running tlist */ window_tlist = add_to_flat_tlist(window_tlist, wflists->windowFuncs[wc->winref]); } else { /* Install the original tlist in the topmost WindowAgg */ window_tlist = tlist; } /* ... and make the WindowAgg plan node */ result_plan = (Plan *) make_windowagg(root, (List *) copyObject(window_tlist), wflists->windowFuncs[wc->winref], wc->winref, partNumCols, partColIdx, partOperators, ordNumCols, ordColIdx, ordOperators, wc->frameOptions, wc->startOffset, wc->endOffset, result_plan); } } } /* end of if (setOperations) */
还是太复杂,进一步分析 else分支:
因为没 group by ,所以 这一小段无视:
/* Preprocess GROUP BY clause, if any */ if (parse->groupClause) preprocess_groupclause(root); numGroupCols = list_length(parse->groupClause);
下面的:
/* Preprocess targetlist */ tlist = preprocess_targetlist(root, tlist);
我如果用 select id, val from tst04 where id >1 ,则tlist 的长度是 2。
我如果用 select id from tst04 where id> 1,则 tlist 的长度是1。
我如果用 select * from tst04 where id>1 ,则 tlist 长度也是2(tst04共有两个字段)。
也就是说, preprocess_targetlist 取得的是 sql查询应当返回的 列名list。
接着,因为没有使用窗口函数,如下这一段也可以无视:
/* * Locate any window functions in the tlist. (We don't need to look * anywhere else, since expressions used in ORDER BY will be in there * too.) Note that they could all have been eliminated by constant * folding, in which case we don't need to do any more work. */ if (parse->hasWindowFuncs) { wflists = find_window_functions((Node *) tlist, list_length(parse->windowClause)); if (wflists->numWindowFuncs > 0) activeWindows = select_active_windows(root, wflists); else parse->hasWindowFuncs = false; }
再接着:
/* * Generate appropriate target list for subplan; may be different from * tlist if grouping or aggregation is needed. */ sub_tlist = make_subplanTargetList(root, tlist, &groupColIdx, &need_tlist_eval);
和前面的 preprocess_targetlist 非常类似。
接下来,因为没使用聚集函数,所以也可无视:
/* * Do aggregate preprocessing, if the query has any aggs. * * Note: think not that we can turn off hasAggs if we find no aggs. It * is possible for constant-expression simplification to remove all * explicit references to aggs, but we still have to follow the * aggregate semantics (eg, producing only one output row). */ if (parse->hasAggs) { /* * Collect statistics about aggregates for estimating costs. Note: * we do not attempt to detect duplicate aggregates here; a * somewhat-overestimated cost is okay for our present purposes. */ count_agg_clauses(root, (Node *) tlist, &agg_costs); count_agg_clauses(root, parse->havingQual, &agg_costs); /* * Preprocess MIN/MAX aggregates, if any. Note: be careful about * adding logic between here and the optimize_minmax_aggregates * call. Anything that is needed in MIN/MAX-optimizable cases * will have to be duplicated in planagg.c. */ preprocess_minmax_aggregates(root, tlist); }
接下来,
/* * Calculate pathkeys that represent grouping/ordering requirements. * Stash them in PlannerInfo so that query_planner can canonicalize * them after EquivalenceClasses have been formed. The sortClause is * certainly sort-able, but GROUP BY and DISTINCT might not be, in * which case we just leave their pathkeys empty. */ if (parse->groupClause && grouping_is_sortable(parse->groupClause)) root->group_pathkeys = make_pathkeys_for_sortclauses(root, parse->groupClause, tlist, false); else root->group_pathkeys = NIL;
得到 root->group_pathkeys 为 NIL。
再看下面的一大段:
/* We consider only the first (bottom) window in pathkeys logic */ if (activeWindows != NIL) { WindowClause *wc = (WindowClause *) linitial(activeWindows); root->window_pathkeys = make_pathkeys_for_window(root, wc, tlist, false); } else root->window_pathkeys = NIL; if (parse->distinctClause && grouping_is_sortable(parse->distinctClause)) root->distinct_pathkeys = make_pathkeys_for_sortclauses(root, parse->distinctClause, tlist, false); else root->distinct_pathkeys = NIL; root->sort_pathkeys = make_pathkeys_for_sortclauses(root, parse->sortClause, tlist, false);
以上,root->window_pathkeys =NIL,root->distinct_pathkeys=NIL。
而 root->sort_pathkeys 的长度为 0。
接下来,下面这段,导致 root->query_pathkeys = NIL。
/* * Figure out whether we want a sorted result from query_planner. * * If we have a sortable GROUP BY clause, then we want a result sorted * properly for grouping. Otherwise, if we have window functions to * evaluate, we try to sort for the first window. Otherwise, if * there's a sortable DISTINCT clause that's more rigorous than the * ORDER BY clause, we try to produce output that's sufficiently well * sorted for the DISTINCT. Otherwise, if there is an ORDER BY * clause, we want to sort by the ORDER BY clause. * * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a * superset of GROUP BY, it would be tempting to request sort by ORDER * BY --- but that might just leave us failing to exploit an available * sort order at all. Needs more thought. The choice for DISTINCT * versus ORDER BY is much easier, since we know that the parser * ensured that one is a superset of the other. */ if (root->group_pathkeys) root->query_pathkeys = root->group_pathkeys; else if (root->window_pathkeys) root->query_pathkeys = root->window_pathkeys; else if (list_length(root->distinct_pathkeys) > list_length(root->sort_pathkeys)) root->query_pathkeys = root->distinct_pathkeys; else if (root->sort_pathkeys) root->query_pathkeys = root->sort_pathkeys; else root->query_pathkeys = NIL;
接下来,因为我的简单查询没有 limit,所以:sub_limit_tuples = limit_tuples=-1
/* * Figure out whether there's a hard limit on the number of rows that * query_planner's result subplan needs to return. Even if we know a * hard limit overall, it doesn't apply if the query has any * grouping/aggregation operations. */ if (parse->groupClause || parse->distinctClause || parse->hasAggs || parse->hasWindowFuncs || root->hasHavingQual) sub_limit_tuples = -1.0; else sub_limit_tuples = limit_tuples;
下面,又到达了一个比较重要的部分:
/* * Generate the best unsorted and presorted paths for this Query (but * note there may not be any presorted path). query_planner will also * estimate the number of groups in the query, and canonicalize all * the pathkeys. */ query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples, &cheapest_path, &sorted_path, &dNumGroups);