PostgreSQL在何处处理 sql查询之四十五
现在,将要进入最为重要的部分:
分析如何制作执行计划的。
先把它简化,看看NodeTag 在 PortalStart 之前,是如何得到的。
先找到第一个相关程序:
每一种执行计划(结点单位),都有一个对应的path。
/***************************************************************************** * PATH NODE CREATION ROUTINES *****************************************************************************/ /* * create_seqscan_path * Creates a path corresponding to a sequential scan, returning the * pathnode. */ Path * create_seqscan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer) { Path *pathnode = makeNode(Path); pathnode->pathtype = T_SeqScan; pathnode->parent = rel; pathnode->param_info = get_baserel_parampathinfo(root, rel, required_outer); pathnode->pathkeys = NIL; /* seqscan has unordered result */ cost_seqscan(pathnode, root, rel, pathnode->param_info); return pathnode; } /* * create_index_path * Creates a path node for an index scan. * * 'index' is a usable index. * 'indexclauses' is a list of RestrictInfo nodes representing clauses * to be used as index qual conditions in the scan. * 'indexclausecols' is an integer list of index column numbers (zero based) * the indexclauses can be used with. * 'indexorderbys' is a list of bare expressions (no RestrictInfos) * to be used as index ordering operators in the scan. * 'indexorderbycols' is an integer list of index column numbers (zero based) * the ordering operators can be used with. * 'pathkeys' describes the ordering of the path. * 'indexscandir' is ForwardScanDirection or BackwardScanDirection * for an ordered index, or NoMovementScanDirection for * an unordered index. * 'indexonly' is true if an index-only scan is wanted. * 'required_outer' is the set of outer relids for a parameterized path. * 'loop_count' is the number of repetitions of the indexscan to factor into * estimates of caching behavior. * * Returns the new path node. */ IndexPath * create_index_path(PlannerInfo *root, IndexOptInfo *index, List *indexclauses, List *indexclausecols, List *indexorderbys, List *indexorderbycols, List *pathkeys, ScanDirection indexscandir, bool indexonly, Relids required_outer, double loop_count) { IndexPath *pathnode = makeNode(IndexPath); RelOptInfo *rel = index->rel; List *indexquals, *indexqualcols; pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan; pathnode->path.parent = rel; pathnode->path.param_info = get_baserel_parampathinfo(root, rel, required_outer); pathnode->path.pathkeys = pathkeys; /* Convert clauses to indexquals the executor can handle */ expand_indexqual_conditions(index, indexclauses, indexclausecols, &indexquals, &indexqualcols); /* Fill in the pathnode */ pathnode->indexinfo = index; pathnode->indexclauses = indexclauses; pathnode->indexquals = indexquals; pathnode->indexqualcols = indexqualcols; pathnode->indexorderbys = indexorderbys; pathnode->indexorderbycols = indexorderbycols; pathnode->indexscandir = indexscandir; cost_index(pathnode, root, loop_count); return pathnode; }
对seq scan而言,其上层是:
/* * set_plain_rel_pathlist * Build access paths for a plain relation (no subquery, no inheritance) */ static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) { fprintf(stderr, "In %s\n",__FUNCTION__); /* Consider sequential scan */ add_path(rel, create_seqscan_path(root, rel, NULL)); /* Consider index scans */ create_index_paths(root, rel); /* Consider TID scans */ create_tidscan_paths(root, rel); /* Now find the cheapest of the paths for this rel */ set_cheapest(rel); }
再上溯:
/* * set_rel_pathlist * Build access paths for a base relation */ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { if (IS_DUMMY_REL(rel)) { /* We already proved the relation empty, so nothing more to do */ } else if (rte->inh) { /* It's an "append relation", process accordingly */ set_append_rel_pathlist(root, rel, rti, rte); } else { switch (rel->rtekind) { case RTE_RELATION: if (rte->relkind == RELKIND_FOREIGN_TABLE) { /* Foreign table */ set_foreign_pathlist(root, rel, rte); } else { /* Plain relation */ set_plain_rel_pathlist(root, rel, rte); } break; case RTE_SUBQUERY: /* Subquery --- fully handled during set_rel_size */ break; case RTE_FUNCTION: /* RangeFunction */ set_function_pathlist(root, rel, rte); break; case RTE_VALUES: /* Values list */ set_values_pathlist(root, rel, rte); break; case RTE_CTE: /* CTE reference --- fully handled during set_rel_size */ break; default: elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); break; } } #ifdef OPTIMIZER_DEBUG debug_print_rel(root, rel); #endif }
接着,就是要分析 rel->rtekind 是什么东西了。
typedef struct RelOptInfo { NodeTag type; RelOptKind reloptkind; /* all relations included in this RelOptInfo */ Relids relids; /* set of base relids (rangetable indexes) */ /* size estimates generated by planner */ double rows; /* estimated number of result tuples */ int width; /* estimated avg width of result tuples */ /* materialization information */ List *reltargetlist; /* Vars to be output by scan of relation */ List *pathlist; /* Path structures */ List *ppilist; /* ParamPathInfos used in pathlist */ struct Path *cheapest_startup_path; struct Path *cheapest_total_path; struct Path *cheapest_unique_path; List *cheapest_parameterized_paths; /* information about a base rel (not set for join rels!) */ Index relid; Oid reltablespace; /* containing tablespace */ RTEKind rtekind; /* RELATION, SUBQUERY, or FUNCTION */ AttrNumber min_attr; /* smallest attrno of rel (often <0) */ AttrNumber max_attr; /* largest attrno of rel */ Relids *attr_needed; /* array indexed [min_attr .. max_attr] */ int32 *attr_widths; /* array indexed [min_attr .. max_attr] */ List *indexlist; /* list of IndexOptInfo */ BlockNumber pages; /* size estimates derived from pg_class */ double tuples; double allvisfrac; /* use "struct Plan" to avoid including plannodes.h here */ struct Plan *subplan; /* if subquery */ PlannerInfo *subroot; /* if subquery */ /* use "struct FdwRoutine" to avoid including fdwapi.h here */ struct FdwRoutine *fdwroutine; /* if foreign table */ void *fdw_private; /* if foreign table */ /* used by various scans and joins: */ List *baserestrictinfo; /* RestrictInfo structures (if base * rel) */ QualCost baserestrictcost; /* cost of evaluating the above */ List *joininfo; /* RestrictInfo structures for join clauses * involving this rel */ bool has_eclass_joins; /* T means joininfo is incomplete */ } RelOptInfo;
RTEKind:
typedef enum RTEKind { RTE_RELATION, /* ordinary relation reference */ RTE_SUBQUERY, /* subquery in FROM */ RTE_JOIN, /* join */ RTE_FUNCTION, /* function in FROM */ RTE_VALUES, /* VALUES (<exprlist>), (<exprlist>), ... */ RTE_CTE /* common table expr (WITH list element) */ } RTEKind;
/* * Add an entry for a relation to the pstate's range table (p_rtable). * * If pstate is NULL, we just build an RTE and return it without adding it * to an rtable list. * * Note: formerly this checked for refname conflicts, but that's wrong. * Caller is responsible for checking for conflicts in the appropriate scope. */ RangeTblEntry * addRangeTableEntry(ParseState *pstate, RangeVar *relation, Alias *alias, bool inh, bool inFromCl) { RangeTblEntry *rte = makeNode(RangeTblEntry); char *refname = alias ? alias->aliasname : relation->relname; LOCKMODE lockmode; Relation rel; //fprintf(stderr,"In addRangeTableEntry: rtekind=RTE_RELATION\n"); rte->rtekind = RTE_RELATION; rte->alias = alias; /* * Get the rel's OID. This access also ensures that we have an up-to-date * relcache entry for the rel. Since this is typically the first access * to a rel in a statement, be careful to get the right access level * depending on whether we're doing SELECT FOR UPDATE/SHARE. */ lockmode = isLockedRefname(pstate, refname) ? RowShareLock : AccessShareLock; rel = parserOpenTable(pstate, relation, lockmode); rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; /* * Build the list of effective column names using user-supplied aliases * and/or actual column names. */ rte->eref = makeAlias(refname, NIL); buildRelationAliases(rel->rd_att, alias, rte->eref); /* * Drop the rel refcount, but keep the access lock till end of transaction * so that the table can't be deleted or have its schema modified * underneath us. */ heap_close(rel, NoLock); /*---------- * Flags: * - this RTE should be expanded to include descendant tables, * - this RTE is in the FROM clause, * - this RTE should be checked for appropriate access rights. * * The initial default on access checks is always check-for-READ-access, * which is the right thing for all except target tables. *---------- */ rte->inh = inh; rte->inFromCl = inFromCl; rte->requiredPerms = ACL_SELECT; rte->checkAsUser = InvalidOid; /* not set-uid by default, either */ rte->selectedCols = NULL; rte->modifiedCols = NULL; /* * Add completed RTE to pstate's range table list, but not to join list * nor namespace --- caller must do that if appropriate. */ if (pstate != NULL) pstate->p_rtable = lappend(pstate->p_rtable, rte); return rte; }