PostgreSQL在何处处理 sql查询之四十三

再次上溯:可以知道,在 ExecutePlan入口参数里面,start_block 就已经是0了。

/* ----------------------------------------------------------------
 *        ExecutePlan
 *
 *        Processes the query plan until we have processed 'numberTuples' tuples,
 *        moving in the specified direction.
 *
 *        Runs to completion if numberTuples is 0
 *
 * Note: the ctid attribute is a 'junk' attribute that is removed before the
 * user can see it
 * ----------------------------------------------------------------
 */
static void
ExecutePlan(EState *estate,
            PlanState *planstate,
            CmdType operation,
            bool sendTuples,
            long numberTuples,
            ScanDirection direction,
            DestReceiver *dest)
{
    TupleTableSlot *slot;
    long        current_tuple_count;

    /*
     * initialize local variables
     */
    current_tuple_count = 0;

    /*
     * Set the direction.
     */
    estate->es_direction = direction;

    /*
     * Loop until we've processed the proper number of tuples from the plan.
     */
    for (;;)
    {
        /* Reset the per-output-tuple exprcontext */
        ResetPerTupleExprContext(estate);

        //ExecProcNode
        /**
        fprintf(stderr,"ExecutePlan:node->ss_currentScanDesc->rs_startblock is: %d by process %d\n",
                ((SeqScanState *) planstate)->ss_currentScanDesc->rs_startblock,getpid());
        */
        //////added by gaojian --start

        SeqScanState *seq_state = ( SeqScanState *)planstate;

        HeapScanDesc heapdesc = seq_state->ss_currentScanDesc;

        BlockNumber bnum;

        if (heapdesc != NULL)
        {

            //fprintf(stderr,"heapdesc is not null\n");

            /**
            if (heapdesc->rs_startblock == NULL  )
                fprintf(stderr,"rs_startblock is NULL\n");
            else
                fprintf(stderr,"rs_startblock is not NULL\n");
            */

            bnum = heapdesc ->rs_startblock;
            //fprintf(stderr,"bnum is %d\n",bnum);

        }else{
            fprintf(stderr,"heapdesc is null\n");
        }

        //fprintf(stderr,"startblock is:%zu\n",seq_state->ss_currentScanDesc->rs_startblock);

        //fprintf(stderr, "%d \n",getpid());
        /////added by gaojian end


        /*
         * Execute the plan and obtain a tuple
         */
        slot = ExecProcNode(planstate);

        /*
         * if the tuple is null, then we assume there is nothing more to
         * process so we just end the loop...
         */
        if (TupIsNull(slot))
            break;

        /*
         * If we have a junk filter, then project a new tuple with the junk
         * removed.
         *
         * Store this new "clean" tuple in the junkfilter's resultSlot.
         * (Formerly, we stored it back over the "dirty" tuple, which is WRONG
         * because that tuple slot has the wrong descriptor.)
         */
        if (estate->es_junkFilter != NULL)
            slot = ExecFilterJunk(estate->es_junkFilter, slot);

        /*
         * If we are supposed to send the tuple somewhere, do so. (In
         * practice, this is probably always the case at this point.)
         */
        if (sendTuples)
            (*dest->receiveSlot) (slot, dest);

        /*
         * Count tuples processed, if this is a SELECT.  (For other operation
         * types, the ModifyTable plan node must count the appropriate
         * events.)
         */
        if (operation == CMD_SELECT)
            (estate->es_processed)++;

        /*
         * check our tuple count.. if we've processed the proper number then
         * quit, else loop again and process more tuples.  Zero numberTuples
         * means no limit.
         */
        current_tuple_count++;
        if (numberTuples && numberTuples == current_tuple_count)
            break;
    }
}

再上溯:

 其 planstate 来自于  queryDesc->planstate。

可以这样认为, queryDesc->planstate 应该早已经初始化好了start_block。

void
ExecutorRun(QueryDesc *queryDesc,
            ScanDirection direction, long count)
{
    if (ExecutorRun_hook)
        (*ExecutorRun_hook) (queryDesc, direction, count);
    else
        standard_ExecutorRun(queryDesc, direction, count);
}

void
standard_ExecutorRun(QueryDesc *queryDesc,
                     ScanDirection direction, long count)
{
    EState       *estate;
    CmdType        operation;
    DestReceiver *dest;
    bool        sendTuples;
    MemoryContext oldcontext;

    /* sanity checks */
    Assert(queryDesc != NULL);

    estate = queryDesc->estate;

    Assert(estate != NULL);
    Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));

    /*
     * Switch into per-query memory context
     */
    oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);

    /* Allow instrumentation of Executor overall runtime */
    if (queryDesc->totaltime)
        InstrStartNode(queryDesc->totaltime);

    /*
     * extract information from the query descriptor and the query feature.
     */
    operation = queryDesc->operation;
    dest = queryDesc->dest;

    /*
     * startup tuple receiver, if we will be emitting tuples
     */
    estate->es_processed = 0;
    estate->es_lastoid = InvalidOid;

    sendTuples = (operation == CMD_SELECT ||
                  queryDesc->plannedstmt->hasReturning);

    if (sendTuples)
        (*dest->rStartup) (dest, operation, queryDesc->tupDesc);

    /*
     * run plan
     */
    if (!ScanDirectionIsNoMovement(direction))
        ExecutePlan(estate,
                    queryDesc->planstate,
                    operation,
                    sendTuples,
                    count,
                    direction,
                    dest);

    /*
     * shutdown tuple receiver, if we started it
     */
    if (sendTuples)
        (*dest->rShutdown) (dest);

    if (queryDesc->totaltime)
        InstrStopNode(queryDesc->totaltime, estate->es_processed);

    MemoryContextSwitchTo(oldcontext);
}

再次上溯:

static long
PortalRunSelect(Portal portal,
                bool forward,
                long count,
                DestReceiver *dest)
{
    QueryDesc  *queryDesc;
    ScanDirection direction;
    uint32        nprocessed;

    /*
     * NB: queryDesc will be NULL if we are fetching from a held cursor or a
     * completed utility query; can't use it in that path.
     */
    queryDesc = PortalGetQueryDesc(portal);

       ...

    if (forward)
    {
        if (portal->atEnd || count <= 0)
            direction = NoMovementScanDirection;
        else
            direction = ForwardScanDirection;

        /* In the executor, zero count processes all rows */
        if (count == FETCH_ALL)
            count = 0;

        if (portal->holdStore)
            nprocessed = RunFromStore(portal, direction, count, dest);
        else
        {
            PushActiveSnapshot(queryDesc->snapshot);
            ExecutorRun(queryDesc, direction, count);
            nprocessed = queryDesc->estate->es_processed;
            PopActiveSnapshot();
        }
                ...
        }
        ...
}

那么,portal 与 QueryDesc 又是什么关系呢?

看下面:就是说 Portal 指针 里保留着一个,指向 QueryDesc 的指针。

typedef struct PortalData *Portal;

typedef struct PortalData
{
    /* Bookkeeping data */
    const char *name;            /* portal's name */
    const char *prepStmtName;    /* source prepared statement (NULL if none) */
    MemoryContext heap;            /* subsidiary memory for portal */
    ResourceOwner resowner;        /* resources owned by portal */
    void        (*cleanup) (Portal portal);        /* cleanup hook */
    SubTransactionId createSubid;        /* the ID of the creating subxact */

    /*
     * if createSubid is InvalidSubTransactionId, the portal is held over from
     * a previous transaction
     */

    /* The query or queries the portal will execute */
    const char *sourceText;        /* text of query (as of 8.4, never NULL) */
    const char *commandTag;        /* command tag for original query */
    List       *stmts;            /* PlannedStmts and/or utility statements */
    CachedPlan *cplan;            /* CachedPlan, if stmts are from one */

    ParamListInfo portalParams; /* params to pass to query */

    /* Features/options */
    PortalStrategy strategy;    /* see above */
    int            cursorOptions;    /* DECLARE CURSOR option bits */

    /* Status data */
    PortalStatus status;        /* see above */
    bool        portalPinned;    /* a pinned portal can't be dropped */

    /* If not NULL, Executor is active; call ExecutorEnd eventually: */
    QueryDesc  *queryDesc;        /* info needed for executor invocation */

    /* If portal returns tuples, this is their tupdesc: */
    TupleDesc    tupDesc;        /* descriptor for result tuples */
    /* and these are the format codes to use for the columns: */
    int16       *formats;        /* a format code for each column */

    /*
     * Where we store tuples for a held cursor or a PORTAL_ONE_RETURNING or
     * PORTAL_UTIL_SELECT query.  (A cursor held past the end of its
     * transaction no longer has any active executor state.)
     */
    Tuplestorestate *holdStore; /* store for holdable cursors */
    MemoryContext holdContext;    /* memory containing holdStore */

    /*
     * atStart, atEnd and portalPos indicate the current cursor position.
     * portalPos is zero before the first row, N after fetching N'th row of
     * query.  After we run off the end, portalPos = # of rows in query, and
     * atEnd is true.  If portalPos overflows, set posOverflow (this causes us
     * to stop relying on its value for navigation).  Note that atStart
     * implies portalPos == 0, but not the reverse (portalPos could have
     * overflowed).
     */
    bool        atStart;
    bool        atEnd;
    bool        posOverflow;
    long        portalPos;

    /* Presentation data, primarily used by the pg_cursors system view */
    TimestampTz creation_time;    /* time at which this portal was defined */
    bool        visible;        /* include this portal in pg_cursors? */
}    PortalData;

再看 QueryDesc:

typedef struct QueryDesc
{
    /* These fields are provided by CreateQueryDesc */
    CmdType        operation;        /* CMD_SELECT, CMD_UPDATE, etc. */
    PlannedStmt *plannedstmt;    /* planner's output, or null if utility */
    Node       *utilitystmt;    /* utility statement, or null */
    const char *sourceText;        /* source text of the query */
    Snapshot    snapshot;        /* snapshot to use for query */
    Snapshot    crosscheck_snapshot;    /* crosscheck for RI update/delete */
    DestReceiver *dest;            /* the destination for tuple output */
    ParamListInfo params;        /* param values being passed in */
    int            instrument_options;        /* OR of InstrumentOption flags */

    /* These fields are set by ExecutorStart */
    TupleDesc    tupDesc;        /* descriptor for result tuples */
    EState       *estate;            /* executor's query-wide state */
    PlanState  *planstate;        /* tree of per-plan-node state */

    /* This is always set NULL by the core system, but plugins can change it */
    struct Instrumentation *totaltime;    /* total time spent in ExecutorRun */
} QueryDesc;

QueryDesc 中,有指向 PlanState 的指针 planstate。

再看 planstate:

typedef struct PlanState
{
    NodeTag        type;

    Plan       *plan;            /* associated Plan node */

    EState       *state;            /* at execution time, states of individual
                                 * nodes point to one EState for the whole
                                 * top-level plan */

    Instrumentation *instrument;    /* Optional runtime stats for this node */

    /*
     * Common structural data for all Plan types.  These links to subsidiary
     * state trees parallel links in the associated plan tree (except for the
     * subPlan list, which does not exist in the plan tree).
     */
    List       *targetlist;        /* target list to be computed at this node */
    List       *qual;            /* implicitly-ANDed qual conditions */
    struct PlanState *lefttree; /* input plan tree(s) */
    struct PlanState *righttree;
    List       *initPlan;        /* Init SubPlanState nodes (un-correlated expr
                                 * subselects) */
    List       *subPlan;        /* SubPlanState nodes in my expressions */

    /*
     * State for management of parameter-change-driven rescanning
     */
    Bitmapset  *chgParam;        /* set of IDs of changed Params */

    /*
     * Other run-time state needed by most if not all node types.
     */
    TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */
    ExprContext *ps_ExprContext;    /* node's expression-evaluation context */
    ProjectionInfo *ps_ProjInfo;    /* info for doing tuple projection */
    bool        ps_TupFromTlist;/* state flag for processing set-valued
                                 * functions in targetlist */
} PlanState;

这里, PlanState 相当于基类了。

typedef struct ScanState
{
    PlanState    ps;                /* its first field is NodeTag */
    Relation    ss_currentRelation;
    HeapScanDesc ss_currentScanDesc;
    TupleTableSlot *ss_ScanTupleSlot;
} ScanState;

/*
 * SeqScan uses a bare ScanState as its state node, since it needs
 * no additional fields.
 */
typedef ScanState SeqScanState;

...

再看下一层的结构: HeapScanDesc :

typedef struct HeapScanDescData *HeapScanDesc;

typedef struct HeapScanDescData
{
    /* scan parameters */
    Relation    rs_rd;            /* heap relation descriptor */
    Snapshot    rs_snapshot;    /* snapshot to see */
    int            rs_nkeys;        /* number of scan keys */
    ScanKey        rs_key;            /* array of scan key descriptors */
    bool        rs_bitmapscan;    /* true if this is really a bitmap scan */
    bool        rs_pageatatime; /* verify visibility page-at-a-time? */
    bool        rs_allow_strat; /* allow or disallow use of access strategy */
    bool        rs_allow_sync;    /* allow or disallow use of syncscan */

    /* state set up at initscan time */
    BlockNumber rs_nblocks;        /* number of blocks to scan */
    BlockNumber rs_startblock;    /* block # to start at */
    BufferAccessStrategy rs_strategy;    /* access strategy for reads */
    bool        rs_syncscan;    /* report location to syncscan logic? */

    /* scan current state */
    bool        rs_inited;        /* false = scan not init'd yet */
    HeapTupleData rs_ctup;        /* current tuple in scan, if any */
    BlockNumber rs_cblock;        /* current block # in scan, if any */
    Buffer        rs_cbuf;        /* current buffer in scan, if any */
    /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
    ItemPointerData rs_mctid;    /* marked scan position, if any */

    /* these fields only used in page-at-a-time mode and for bitmap scans */
    int            rs_cindex;        /* current tuple's index in vistuples */
    int            rs_mindex;        /* marked tuple's saved index */
    int            rs_ntuples;        /* number of visible tuples on page */
    OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];    /* their offsets */
}    HeapScanDescData;

至少在  PortalRunSelect 函数中,装箱在此之前就已经完成。后期在 ExecutorRun等等内部就相当于就 PlanState 进行拆箱了。

 

posted @ 2013-06-03 13:45  健哥的数据花园  阅读(567)  评论(0编辑  收藏  举报