PostgreSQL在何处处理 sql查询之四十三
再次上溯:可以知道,在 ExecutePlan入口参数里面,start_block 就已经是0了。
/* ---------------------------------------------------------------- * ExecutePlan * * Processes the query plan until we have processed 'numberTuples' tuples, * moving in the specified direction. * * Runs to completion if numberTuples is 0 * * Note: the ctid attribute is a 'junk' attribute that is removed before the * user can see it * ---------------------------------------------------------------- */ static void ExecutePlan(EState *estate, PlanState *planstate, CmdType operation, bool sendTuples, long numberTuples, ScanDirection direction, DestReceiver *dest) { TupleTableSlot *slot; long current_tuple_count; /* * initialize local variables */ current_tuple_count = 0; /* * Set the direction. */ estate->es_direction = direction; /* * Loop until we've processed the proper number of tuples from the plan. */ for (;;) { /* Reset the per-output-tuple exprcontext */ ResetPerTupleExprContext(estate); //ExecProcNode /** fprintf(stderr,"ExecutePlan:node->ss_currentScanDesc->rs_startblock is: %d by process %d\n", ((SeqScanState *) planstate)->ss_currentScanDesc->rs_startblock,getpid()); */ //////added by gaojian --start SeqScanState *seq_state = ( SeqScanState *)planstate; HeapScanDesc heapdesc = seq_state->ss_currentScanDesc; BlockNumber bnum; if (heapdesc != NULL) { //fprintf(stderr,"heapdesc is not null\n"); /** if (heapdesc->rs_startblock == NULL ) fprintf(stderr,"rs_startblock is NULL\n"); else fprintf(stderr,"rs_startblock is not NULL\n"); */ bnum = heapdesc ->rs_startblock; //fprintf(stderr,"bnum is %d\n",bnum); }else{ fprintf(stderr,"heapdesc is null\n"); } //fprintf(stderr,"startblock is:%zu\n",seq_state->ss_currentScanDesc->rs_startblock); //fprintf(stderr, "%d \n",getpid()); /////added by gaojian end /* * Execute the plan and obtain a tuple */ slot = ExecProcNode(planstate); /* * if the tuple is null, then we assume there is nothing more to * process so we just end the loop... */ if (TupIsNull(slot)) break; /* * If we have a junk filter, then project a new tuple with the junk * removed. * * Store this new "clean" tuple in the junkfilter's resultSlot. * (Formerly, we stored it back over the "dirty" tuple, which is WRONG * because that tuple slot has the wrong descriptor.) */ if (estate->es_junkFilter != NULL) slot = ExecFilterJunk(estate->es_junkFilter, slot); /* * If we are supposed to send the tuple somewhere, do so. (In * practice, this is probably always the case at this point.) */ if (sendTuples) (*dest->receiveSlot) (slot, dest); /* * Count tuples processed, if this is a SELECT. (For other operation * types, the ModifyTable plan node must count the appropriate * events.) */ if (operation == CMD_SELECT) (estate->es_processed)++; /* * check our tuple count.. if we've processed the proper number then * quit, else loop again and process more tuples. Zero numberTuples * means no limit. */ current_tuple_count++; if (numberTuples && numberTuples == current_tuple_count) break; } }
再上溯:
其 planstate 来自于 queryDesc->planstate。
可以这样认为, queryDesc->planstate 应该早已经初始化好了start_block。
void ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count) { if (ExecutorRun_hook) (*ExecutorRun_hook) (queryDesc, direction, count); else standard_ExecutorRun(queryDesc, direction, count); } void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count) { EState *estate; CmdType operation; DestReceiver *dest; bool sendTuples; MemoryContext oldcontext; /* sanity checks */ Assert(queryDesc != NULL); estate = queryDesc->estate; Assert(estate != NULL); Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); /* * Switch into per-query memory context */ oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); /* Allow instrumentation of Executor overall runtime */ if (queryDesc->totaltime) InstrStartNode(queryDesc->totaltime); /* * extract information from the query descriptor and the query feature. */ operation = queryDesc->operation; dest = queryDesc->dest; /* * startup tuple receiver, if we will be emitting tuples */ estate->es_processed = 0; estate->es_lastoid = InvalidOid; sendTuples = (operation == CMD_SELECT || queryDesc->plannedstmt->hasReturning); if (sendTuples) (*dest->rStartup) (dest, operation, queryDesc->tupDesc); /* * run plan */ if (!ScanDirectionIsNoMovement(direction)) ExecutePlan(estate, queryDesc->planstate, operation, sendTuples, count, direction, dest); /* * shutdown tuple receiver, if we started it */ if (sendTuples) (*dest->rShutdown) (dest); if (queryDesc->totaltime) InstrStopNode(queryDesc->totaltime, estate->es_processed); MemoryContextSwitchTo(oldcontext); }
再次上溯:
static long PortalRunSelect(Portal portal, bool forward, long count, DestReceiver *dest) { QueryDesc *queryDesc; ScanDirection direction; uint32 nprocessed; /* * NB: queryDesc will be NULL if we are fetching from a held cursor or a * completed utility query; can't use it in that path. */ queryDesc = PortalGetQueryDesc(portal); ... if (forward) { if (portal->atEnd || count <= 0) direction = NoMovementScanDirection; else direction = ForwardScanDirection; /* In the executor, zero count processes all rows */ if (count == FETCH_ALL) count = 0; if (portal->holdStore) nprocessed = RunFromStore(portal, direction, count, dest); else { PushActiveSnapshot(queryDesc->snapshot); ExecutorRun(queryDesc, direction, count); nprocessed = queryDesc->estate->es_processed; PopActiveSnapshot(); } ... } ... }
那么,portal 与 QueryDesc 又是什么关系呢?
看下面:就是说 Portal 指针 里保留着一个,指向 QueryDesc 的指针。
typedef struct PortalData *Portal; typedef struct PortalData { /* Bookkeeping data */ const char *name; /* portal's name */ const char *prepStmtName; /* source prepared statement (NULL if none) */ MemoryContext heap; /* subsidiary memory for portal */ ResourceOwner resowner; /* resources owned by portal */ void (*cleanup) (Portal portal); /* cleanup hook */ SubTransactionId createSubid; /* the ID of the creating subxact */ /* * if createSubid is InvalidSubTransactionId, the portal is held over from * a previous transaction */ /* The query or queries the portal will execute */ const char *sourceText; /* text of query (as of 8.4, never NULL) */ const char *commandTag; /* command tag for original query */ List *stmts; /* PlannedStmts and/or utility statements */ CachedPlan *cplan; /* CachedPlan, if stmts are from one */ ParamListInfo portalParams; /* params to pass to query */ /* Features/options */ PortalStrategy strategy; /* see above */ int cursorOptions; /* DECLARE CURSOR option bits */ /* Status data */ PortalStatus status; /* see above */ bool portalPinned; /* a pinned portal can't be dropped */ /* If not NULL, Executor is active; call ExecutorEnd eventually: */ QueryDesc *queryDesc; /* info needed for executor invocation */ /* If portal returns tuples, this is their tupdesc: */ TupleDesc tupDesc; /* descriptor for result tuples */ /* and these are the format codes to use for the columns: */ int16 *formats; /* a format code for each column */ /* * Where we store tuples for a held cursor or a PORTAL_ONE_RETURNING or * PORTAL_UTIL_SELECT query. (A cursor held past the end of its * transaction no longer has any active executor state.) */ Tuplestorestate *holdStore; /* store for holdable cursors */ MemoryContext holdContext; /* memory containing holdStore */ /* * atStart, atEnd and portalPos indicate the current cursor position. * portalPos is zero before the first row, N after fetching N'th row of * query. After we run off the end, portalPos = # of rows in query, and * atEnd is true. If portalPos overflows, set posOverflow (this causes us * to stop relying on its value for navigation). Note that atStart * implies portalPos == 0, but not the reverse (portalPos could have * overflowed). */ bool atStart; bool atEnd; bool posOverflow; long portalPos; /* Presentation data, primarily used by the pg_cursors system view */ TimestampTz creation_time; /* time at which this portal was defined */ bool visible; /* include this portal in pg_cursors? */ } PortalData;
再看 QueryDesc:
typedef struct QueryDesc { /* These fields are provided by CreateQueryDesc */ CmdType operation; /* CMD_SELECT, CMD_UPDATE, etc. */ PlannedStmt *plannedstmt; /* planner's output, or null if utility */ Node *utilitystmt; /* utility statement, or null */ const char *sourceText; /* source text of the query */ Snapshot snapshot; /* snapshot to use for query */ Snapshot crosscheck_snapshot; /* crosscheck for RI update/delete */ DestReceiver *dest; /* the destination for tuple output */ ParamListInfo params; /* param values being passed in */ int instrument_options; /* OR of InstrumentOption flags */ /* These fields are set by ExecutorStart */ TupleDesc tupDesc; /* descriptor for result tuples */ EState *estate; /* executor's query-wide state */ PlanState *planstate; /* tree of per-plan-node state */ /* This is always set NULL by the core system, but plugins can change it */ struct Instrumentation *totaltime; /* total time spent in ExecutorRun */ } QueryDesc;
QueryDesc 中,有指向 PlanState 的指针 planstate。
再看 planstate:
typedef struct PlanState { NodeTag type; Plan *plan; /* associated Plan node */ EState *state; /* at execution time, states of individual * nodes point to one EState for the whole * top-level plan */ Instrumentation *instrument; /* Optional runtime stats for this node */ /* * Common structural data for all Plan types. These links to subsidiary * state trees parallel links in the associated plan tree (except for the * subPlan list, which does not exist in the plan tree). */ List *targetlist; /* target list to be computed at this node */ List *qual; /* implicitly-ANDed qual conditions */ struct PlanState *lefttree; /* input plan tree(s) */ struct PlanState *righttree; List *initPlan; /* Init SubPlanState nodes (un-correlated expr * subselects) */ List *subPlan; /* SubPlanState nodes in my expressions */ /* * State for management of parameter-change-driven rescanning */ Bitmapset *chgParam; /* set of IDs of changed Params */ /* * Other run-time state needed by most if not all node types. */ TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */ ExprContext *ps_ExprContext; /* node's expression-evaluation context */ ProjectionInfo *ps_ProjInfo; /* info for doing tuple projection */ bool ps_TupFromTlist;/* state flag for processing set-valued * functions in targetlist */ } PlanState;
这里, PlanState 相当于基类了。
typedef struct ScanState { PlanState ps; /* its first field is NodeTag */ Relation ss_currentRelation; HeapScanDesc ss_currentScanDesc; TupleTableSlot *ss_ScanTupleSlot; } ScanState; /* * SeqScan uses a bare ScanState as its state node, since it needs * no additional fields. */ typedef ScanState SeqScanState;
...
再看下一层的结构: HeapScanDesc :
typedef struct HeapScanDescData *HeapScanDesc; typedef struct HeapScanDescData { /* scan parameters */ Relation rs_rd; /* heap relation descriptor */ Snapshot rs_snapshot; /* snapshot to see */ int rs_nkeys; /* number of scan keys */ ScanKey rs_key; /* array of scan key descriptors */ bool rs_bitmapscan; /* true if this is really a bitmap scan */ bool rs_pageatatime; /* verify visibility page-at-a-time? */ bool rs_allow_strat; /* allow or disallow use of access strategy */ bool rs_allow_sync; /* allow or disallow use of syncscan */ /* state set up at initscan time */ BlockNumber rs_nblocks; /* number of blocks to scan */ BlockNumber rs_startblock; /* block # to start at */ BufferAccessStrategy rs_strategy; /* access strategy for reads */ bool rs_syncscan; /* report location to syncscan logic? */ /* scan current state */ bool rs_inited; /* false = scan not init'd yet */ HeapTupleData rs_ctup; /* current tuple in scan, if any */ BlockNumber rs_cblock; /* current block # in scan, if any */ Buffer rs_cbuf; /* current buffer in scan, if any */ /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ ItemPointerData rs_mctid; /* marked scan position, if any */ /* these fields only used in page-at-a-time mode and for bitmap scans */ int rs_cindex; /* current tuple's index in vistuples */ int rs_mindex; /* marked tuple's saved index */ int rs_ntuples; /* number of visible tuples on page */ OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ } HeapScanDescData;
至少在 PortalRunSelect 函数中,装箱在此之前就已经完成。后期在 ExecutorRun等等内部就相当于就 PlanState 进行拆箱了。