PostgreSQL在何处处理 sql查询之十七
继续:
/* * estimate_rel_size - estimate # pages and # tuples in a table or index * * We also estimate the fraction of the pages that are marked all-visible in * the visibility map, for use in estimation of index-only scans. * * If attr_widths isn't NULL, it points to the zero-index entry of the * relation's attr_widths[] cache; we fill this in if we have need to compute * the attribute widths for estimation purposes. */ void estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac) { ... switch (rel->rd_rel->relkind) { case RELKIND_RELATION: case RELKIND_INDEX: case RELKIND_TOASTVALUE: /* it has storage, ok to call the smgr */ curpages = RelationGetNumberOfBlocks(rel); ... break; case RELKIND_SEQUENCE: ... break; case RELKIND_FOREIGN_TABLE: ... break; default: ... break; } }
首先要判断此表有多少个块: RelationGetNumberOfBlocks
/* * The physical storage of a relation consists of one or more forks. The * main fork is always created, but in addition to that there can be * additional forks for storing various metadata. ForkNumber is used when * we need to refer to a specific fork in a relation. */ typedef enum ForkNumber { InvalidForkNumber = -1, MAIN_FORKNUM = 0, FSM_FORKNUM, VISIBILITYMAP_FORKNUM, INIT_FORKNUM /* * NOTE: if you add a new fork, change MAX_FORKNUM below and update the * forkNames array in catalog.c */ } ForkNumber;
再看:
#define RelationGetNumberOfBlocks(reln) \ RelationGetNumberOfBlocksInFork(reln, MAIN_FORKNUM)
再看:
/* * RelationGetNumberOfBlocks * Determines the current number of pages in the relation. */ BlockNumber RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum) { /* Open it at the smgr level if not already done */ RelationOpenSmgr(relation); return smgrnblocks(relation->rd_smgr, forkNum); }
再看:
数据库表对应的文件发生问题时,smgrnblocks 函数会发生错误:
/* * smgrnblocks() -- Calculate the number of blocks in the * supplied relation. */ BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum) { return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln, forknum); }
此处,使用了函数指针,经过一番跟踪,发现当我第一次执行如 select * from tab01 命令时,会执行到:
/* * mdnblocks() -- Get the number of blocks stored in a relation. * * Important side effect: all active segments of the relation are opened * and added to the mdfd_chain list. If this routine has not been * called, then only segments up to the last one actually touched * are present in the chain. */ BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum) { MdfdVec *v = mdopen(reln, forknum, EXTENSION_FAIL); BlockNumber nblocks; BlockNumber segno = 0; /* * Skip through any segments that aren't the last one, to avoid redundant * seeks on them. We have previously verified that these segments are * exactly RELSEG_SIZE long, and it's useless to recheck that each time. * * NOTE: this assumption could only be wrong if another backend has * truncated the relation. We rely on higher code levels to handle that * scenario by closing and re-opening the md fd, which is handled via * relcache flush. (Since the checkpointer doesn't participate in * relcache flush, it could have segment chain entries for inactive * segments; that's OK because the checkpointer never needs to compute * relation size.) */ while (v->mdfd_chain != NULL) { segno++; v = v->mdfd_chain; } for (;;) { nblocks = _mdnblocks(reln, forknum, v); if (nblocks > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big"); if (nblocks < ((BlockNumber) RELSEG_SIZE)) return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks; /* * If segment is exactly RELSEG_SIZE, advance to next one. */ segno++; if (v->mdfd_chain == NULL) { /* * Because we pass O_CREAT, we will create the next segment (with * zero length) immediately, if the last segment is of length * RELSEG_SIZE. While perhaps not strictly necessary, this keeps * the logic simple. */ v->mdfd_chain = _mdfd_openseg(reln, forknum, segno, O_CREAT); if (v->mdfd_chain == NULL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", _mdfd_segpath(reln, forknum, segno)))); } v = v->mdfd_chain; } }
下一步看 mdopen函数
/* * mdopen() -- Open the specified relation. * * Note we only open the first segment, when there are multiple segments. * * If first segment is not present, either ereport or return NULL according * to "behavior". We treat EXTENSION_CREATE the same as EXTENSION_FAIL; * EXTENSION_CREATE means it's OK to extend an existing relation, not to * invent one out of whole cloth. */ static MdfdVec * mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior) { ... path = relpath(reln->smgr_rnode, forknum);
fd = PathNameOpenFile(path, O_RDWR | PG_BINARY, 0600); if (fd < 0) { fprintf(stderr,"In %s----%d\n",__FUNCTION__, __LINE__); /* * During bootstrap, there are cases where a system relation will be * accessed (by internal backend processes) before the bootstrap * script nominally creates it. Therefore, accept mdopen() as a * substitute for mdcreate() in bootstrap mode only. (See mdcreate) */ if (IsBootstrapProcessingMode()) fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); fprintf(stderr,"In %s----%d\n",__FUNCTION__, __LINE__); if (fd < 0) { if (behavior == EXTENSION_RETURN_NULL && FILE_POSSIBLY_DELETED(errno)) { pfree(path); return NULL; } ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", path))); } } ...return mdfd; }
再看 PathNameOpenFile,如果打开文件失败,就会返回-1。
/* * open a file in an arbitrary directory * * NB: if the passed pathname is relative (which it usually is), * it will be interpreted relative to the process' working directory * (which should always be $PGDATA when this code is running). */ File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode) { char *fnamecopy; File file; Vfd *vfdP; DO_DB(elog(LOG, "PathNameOpenFile: %s %x %o", fileName, fileFlags, fileMode)); /* * We need a malloc'd copy of the file name; fail cleanly if no room. */ fnamecopy = strdup(fileName); if (fnamecopy == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); file = AllocateVfd(); vfdP = &VfdCache[file]; while (nfile + numAllocatedDescs >= max_safe_fds) { if (!ReleaseLruFile()) break; } vfdP->fd = BasicOpenFile(fileName, fileFlags, fileMode); if (vfdP->fd < 0) { FreeVfd(file); free(fnamecopy); return -1; } ++nfile; DO_DB(elog(LOG, "PathNameOpenFile: success %d", vfdP->fd)); Insert(file); vfdP->fileName = fnamecopy; /* Saved flags are adjusted to be OK for re-opening file */ vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL); vfdP->fileMode = fileMode; vfdP->seekPos = 0; vfdP->fileSize = 0; vfdP->fdstate = 0x0; vfdP->resowner = NULL; return file; }