PostgreSQL 的pg_buffercache 代码研究
磨砺技术珠矶,践行数据之道,追求卓越价值
回到上一级页面:PostgreSQL内部结构与源代码研究索引页 回到顶级页面:PostgreSQL索引页
[作者:技术者高健@博客园 mail: luckyjackgao@gmail.com]
pg_buffercache 代码位于 contrib 目录,总体上代码量200多行。
刚接触,感觉直接访问PostgreSQL 中的内存结构很神奇,特意学习了一下。
/*-------------------------------------------------------------------------
*
* pg_buffercache_pages.c
* display some contents of the buffer cache
*
* contrib/pg_buffercache/pg_buffercache_pages.c
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
#define NUM_BUFFERCACHE_PAGES_ELEM 8
PG_MODULE_MAGIC;
Datum pg_buffercache_pages(PG_FUNCTION_ARGS);
/*
* Record structure holding the to be exposed cache data.
*/
typedef struct
{
uint32 bufferid;
Oid relfilenode;
Oid reltablespace;
Oid reldatabase;
ForkNumber forknum;
BlockNumber blocknum;
bool isvalid;
bool isdirty;
uint16 usagecount;
} BufferCachePagesRec;
/*
* Function context for data persisting over repeated calls.
*/
typedef struct
{
TupleDesc tupdesc;
BufferCachePagesRec *record;
} BufferCachePagesContext;
/*
* Function returning data from the shared buffer cache - buffer number,
* relation node/tablespace/database/blocknum and dirty indicator.
*/
PG_FUNCTION_INFO_V1(pg_buffercache_pages);
Datum
pg_buffercache_pages(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
MemoryContext oldcontext;
BufferCachePagesContext *fctx; /* User function context. */
TupleDesc tupledesc;
HeapTuple tuple;
if (SRF_IS_FIRSTCALL())
{
int i;
volatile BufferDesc *bufHdr;
funcctx = SRF_FIRSTCALL_INIT();
/* Switch context when allocating stuff to be used in later calls */
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
/* Create a user function context for cross-call persistence */
fctx = (BufferCachePagesContext *) palloc(sizeof(BufferCachePagesContext));
/* Construct a tuple descriptor for the result rows. */
tupledesc = CreateTemplateTupleDesc(NUM_BUFFERCACHE_PAGES_ELEM, false);
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
INT4OID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
OIDOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
OIDOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
OIDOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
INT2OID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
INT8OID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
BOOLOID, -1, 0);
TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
INT2OID, -1, 0);
fctx->tupdesc = BlessTupleDesc(tupledesc);
/* Allocate NBuffers worth of BufferCachePagesRec records. */
fctx->record = (BufferCachePagesRec *) palloc(sizeof(BufferCachePagesRec) * NBuffers);
/* Set max calls and remember the user function context. */
funcctx->max_calls = NBuffers;
funcctx->user_fctx = fctx;
/* Return to original context when allocating transient memory */
MemoryContextSwitchTo(oldcontext);
/*
* To get a consistent picture of the buffer state, we must lock all
* partitions of the buffer map. Needless to say, this is horrible
* for concurrency. Must grab locks in increasing order to avoid
* possible deadlocks.
*/
for (i = 0; i < NUM_BUFFER_PARTITIONS; i++)
LWLockAcquire(FirstBufMappingLock + i, LW_SHARED);
/*
* Scan though all the buffers, saving the relevant fields in the
* fctx->record structure.
*/
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
{
/* Lock each buffer header before inspecting. */
LockBufHdr(bufHdr);
fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr);
fctx->record[i].relfilenode = bufHdr->tag.rnode.relNode;
fctx->record[i].reltablespace = bufHdr->tag.rnode.spcNode;
fctx->record[i].reldatabase = bufHdr->tag.rnode.dbNode;
fctx->record[i].forknum = bufHdr->tag.forkNum;
fctx->record[i].blocknum = bufHdr->tag.blockNum;
fctx->record[i].usagecount = bufHdr->usage_count;
if (bufHdr->flags & BM_DIRTY)
fctx->record[i].isdirty = true;
else
fctx->record[i].isdirty = false;
/* Note if the buffer is valid, and has storage created */
if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_TAG_VALID))
fctx->record[i].isvalid = true;
else
fctx->record[i].isvalid = false;
UnlockBufHdr(bufHdr);
}
/*
* And release locks. We do this in reverse order for two reasons:
* (1) Anyone else who needs more than one of the locks will be trying
* to lock them in increasing order; we don't want to release the
* other process until it can get all the locks it needs. (2) This
* avoids O(N^2) behavior inside LWLockRelease.
*/
for (i = NUM_BUFFER_PARTITIONS; --i >= 0;)
LWLockRelease(FirstBufMappingLock + i);
}
funcctx = SRF_PERCALL_SETUP();
/* Get the saved state */
fctx = funcctx->user_fctx;
if (funcctx->call_cntr < funcctx->max_calls)
{
uint32 i = funcctx->call_cntr;
Datum values[NUM_BUFFERCACHE_PAGES_ELEM];
bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
values[0] = Int32GetDatum(fctx->record[i].bufferid);
nulls[0] = false;
/*
* Set all fields except the bufferid to null if the buffer is unused
* or not valid.
*/
if (fctx->record[i].blocknum == InvalidBlockNumber ||
fctx->record[i].isvalid == false)
{
nulls[1] = true;
nulls[2] = true;
nulls[3] = true;
nulls[4] = true;
nulls[5] = true;
nulls[6] = true;
nulls[7] = true;
}
else
{
values[1] = ObjectIdGetDatum(fctx->record[i].relfilenode);
nulls[1] = false;
values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace);
nulls[2] = false;
values[3] = ObjectIdGetDatum(fctx->record[i].reldatabase);
nulls[3] = false;
values[4] = ObjectIdGetDatum(fctx->record[i].forknum);
nulls[4] = false;
values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
nulls[5] = false;
values[6] = BoolGetDatum(fctx->record[i].isdirty);
nulls[6] = false;
values[7] = Int16GetDatum(fctx->record[i].usagecount);
nulls[7] = false;
}
/* Build and return the tuple. */
tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
result = HeapTupleGetDatum(tuple);
SRF_RETURN_NEXT(funcctx, result);
}
SRF_RETURN_DONE(funcctx);
}
我的看法是这样的:
官方给的例子是这样说的:
Datum
my_set_returning_function(PG_FUNCTION_ARGS)
{
FuncCallContext *funcctx;
Datum result;
MemoryContext oldcontext;
further declarations as needed if (SRF_IS_FIRSTCALL()) {
funcctx = SRF_FIRSTCALL_INIT();
oldcontext =
MemoryContextSwitchTo(funcctx->
multi_call_memory_ctx);
/* One-time setup code appears here: */
<<user code>>
<<if returning composite>>
<<build TupleDesc, and perhaps AttInMetadata>>
<<endif returning composite>>
<<user code>>
MemoryContextSwitchTo(oldcontext);
}
/* Each-time setup code appears here: */
<<user code>>
funcctx = SRF_PERCALL_SETUP();
<<user code>>
/* this is just one way we might test whether we are
done: */
if (funcctx->call_cntr < funcctx->max_calls) {
/* Here we want to return another item: */
<<user code>>
<<obtain result Datum>>
SRF_RETURN_NEXT(funcctx, result);
} else {
/* Here we are done returning items and just need to
clean up: */
<<user code>>
SRF_RETURN_DONE(funcctx);
}
}
对于其中 的 call_cntr 和 max_calls ,不是太理解。原来想,是否一次就该结束了。但是验证的结果是:其实还是会被调用多次。
[作者:技术者高健@博客园 mail: luckyjackgao@gmail.com]
修改代码后部为这个样子:
……
if (funcctx->call_cntr < funcctx->max_calls)
{
fprintf(stderr,"!!!!!call_cntr is smaller than max_calls");
……
SRF_RETURN_NEXT(funcctx, result);
}
else{
fprintf(stderr,"call_cntr is not smaller than max_calls");
SRF_RETURN_DONE(funcctx);
}
运行结果:出现多次 !!!!!call_cntr is smaller than max_calls , 最后出现一次 call_cntr smaller than max_calls
哪怕是 我用 select bufferid from pg_buffercache limit 1; 也是一样的效果。
再次测试,可以 发现, max_calls 居然为4096,也可以说, 我们在 psql 中对 pg_buffercache 的 一次普通查询,在后台进行了4096次运转。好古怪!
如果psql 中运行 select count(*) from pg_buffercache; 得出结果正好是 4096。
然后我们再看 NBuffers 的值,在程序中 下列循环之前,打印 NBuffers的值,发现只执行一次:值也是 4096
/*
* Scan though all the buffers, saving the relevant fields in the
* fctx->record structure.
*/
for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
{
...
}
那么可以说:
/* Each-time setup code appears here: */
<<user code>>
funcctx = SRF_PERCALL_SETUP();
<<user code>>
之前的代码,只执行一次。后面的代码,虽然我们没有写循环,但是会循环(因为 SRF_RETURN_NEXT)直到我们的代码执行了 SRF_RETURN_DONE为止。
[作者:技术者高健@博客园 mail: luckyjackgao@gmail.com]
回到上一级页面:PostgreSQL内部结构与源代码研究索引页 回到顶级页面:PostgreSQL索引页
磨砺技术珠矶,践行数据之道,追求卓越价值