千家信息网

PostgreSQL 源码解读(132)- MVCC#16(vacuum过程-lazy_vacuum_index函数#1)

发表于:2025-01-21 作者:千家信息网编辑
千家信息网最后更新 2025年01月21日,本节简单介绍了PostgreSQL手工执行vacuum的处理流程,主要分析了ExecVacuum->vacuum->vacuum_rel->heap_vacuum_rel->lazy_scan_hea
千家信息网最后更新 2025年01月21日PostgreSQL 源码解读(132)- MVCC#16(vacuum过程-lazy_vacuum_index函数#1)

本节简单介绍了PostgreSQL手工执行vacuum的处理流程,主要分析了ExecVacuum->vacuum->vacuum_rel->heap_vacuum_rel->lazy_scan_heap->lazy_vacuum_index函数的实现逻辑,该函数清理index relation。

一、数据结构

宏定义
Vacuum和Analyze命令选项

/* ---------------------- *      Vacuum and Analyze Statements *      Vacuum和Analyze命令选项 *  * Even though these are nominally two statements, it's convenient to use * just one node type for both.  Note that at least one of VACOPT_VACUUM * and VACOPT_ANALYZE must be set in options. * 虽然在这里有两种不同的语句,但只需要使用统一的Node类型即可. * 注意至少VACOPT_VACUUM/VACOPT_ANALYZE在选项中设置. * ---------------------- */typedef enum VacuumOption{    VACOPT_VACUUM = 1 << 0,     /* do VACUUM */    VACOPT_ANALYZE = 1 << 1,    /* do ANALYZE */    VACOPT_VERBOSE = 1 << 2,    /* print progress info */    VACOPT_FREEZE = 1 << 3,     /* FREEZE option */    VACOPT_FULL = 1 << 4,       /* FULL (non-concurrent) vacuum */    VACOPT_SKIP_LOCKED = 1 << 5,    /* skip if cannot get lock */    VACOPT_SKIPTOAST = 1 << 6,  /* don't process the TOAST table, if any */    VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7   /* don't skip any pages */} VacuumOption;

IndexVacuumInfo
传递给ambulkdelete/amvacuumcleanup的输入参数结构体

/* * Struct for input arguments passed to ambulkdelete and amvacuumcleanup * 传递给ambulkdelete/amvacuumcleanup的输入参数结构体 * * num_heap_tuples is accurate only when estimated_count is false; * otherwise it's just an estimate (currently, the estimate is the * prior value of the relation's pg_class.reltuples field).  It will * always just be an estimate during ambulkdelete. * 在estimated_count为F的情况下,num_heap_tuples才是精确的. * 否则,该值只是一个故事(当前的实现是,该值是relation's pg_class.reltuples字段的上一个值). * 在ambulkdelete期间该值会一直都是估算值. */typedef struct IndexVacuumInfo{    //index relation    Relation    index;          /* the index being vacuumed */    //是否只是ANALYZE(没有实际的vacuum)    bool        analyze_only;   /* ANALYZE (without any actual vacuum) */    //如为T,则num_heap_tuples是一个估算值    bool        estimated_count;    /* num_heap_tuples is an estimate */    //进度信息的日志等级    int         message_level;  /* ereport level for progress messages */    //在堆中仍存在的元组数    double      num_heap_tuples;    /* tuples remaining in heap */    //访问策略    BufferAccessStrategy strategy;  /* access strategy for reads */} IndexVacuumInfo;

IndexBulkDeleteResult
ambulkdelete/amvacuumcleanup返回的统计信息结构体

/* * Struct for statistics returned by ambulkdelete and amvacuumcleanup * ambulkdelete/amvacuumcleanup返回的统计信息结构体 *  * This struct is normally allocated by the first ambulkdelete call and then * passed along through subsequent ones until amvacuumcleanup; however, * amvacuumcleanup must be prepared to allocate it in the case where no * ambulkdelete calls were made (because no tuples needed deletion). * Note that an index AM could choose to return a larger struct * of which this is just the first field; this provides a way for ambulkdelete * to communicate additional private data to amvacuumcleanup. * 该结构体通常由第一个ambulkdelete调用分配内存,传递到下一个处理过程,直至amvacuumcleanup; * 但是,在ambulkdelete没有调用时,amvacuumcleanup必须预分配(因为没有元组需要删除). * 注意索引访问方法(AM)可以选择返回一个更大的结构体,而该结构体是这个更大的结构体的第一个域; * 这为ambulkdelete提供了一个方法用于与需要额外私有数据的amvacuumcleanup函数通讯. * * Note: pages_removed is the amount by which the index physically shrank, * if any (ie the change in its total size on disk).  pages_deleted and * pages_free refer to free space within the index file.  Some index AMs * may compute num_index_tuples by reference to num_heap_tuples, in which * case they should copy the estimated_count field from IndexVacuumInfo. * 注意:pages_remove是索引物理收缩(shrank)的数量,如果有的话(即它在磁盘上的总大小的变化)。 * pages_deleted和pages_free指的是索引文件中的空闲空间. * 某些索引访问方法(AMs)可能通过参考num_heap_tuples计算num_index_tuples, *   在这种情况下会拷贝从IndexVacuumInfo中拷贝estimated_count域. */typedef struct IndexBulkDeleteResult{    //index中剩下的pages    BlockNumber num_pages;      /* pages remaining in index */    //在vacuum期间清除的元组数    BlockNumber pages_removed;  /* # removed during vacuum operation */    //num_index_tuples是一个估算值?    bool        estimated_count;    /* num_index_tuples is an estimate */    //剩余的元组数    double      num_index_tuples;   /* tuples remaining */    //在vacuum期间清除的元组数    double      tuples_removed; /* # removed during vacuum operation */    //索引中未使用的pages    BlockNumber pages_deleted;  /* # unused pages in index */    //可重用的pages    BlockNumber pages_free;     /* # pages available for reuse */} IndexBulkDeleteResult;

二、源码解读

lazy_vacuum_index
lazy_vacuum_index清理index relation,删除指向在vacrelstats->dead_tuples元组的索引条目,更新运行时统计信息.
主要逻辑如下:
1.初始化IndexVacuumInfo结构体变量
2.调用index_bulk_delete函数
3.报告进展

/* *  lazy_vacuum_index() -- vacuum one index relation. *  lazy_vacuum_index() -- 清理index relation * *      Delete all the index entries pointing to tuples listed in *      vacrelstats->dead_tuples, and update running statistics. *      删除指向在vacrelstats->dead_tuples元组的索引条目,更新运行时统计信息. */static voidlazy_vacuum_index(Relation indrel,                  IndexBulkDeleteResult **stats,                  LVRelStats *vacrelstats){    IndexVacuumInfo ivinfo;    PGRUsage    ru0;    pg_rusage_init(&ru0);    ivinfo.index = indrel;    ivinfo.analyze_only = false;    ivinfo.estimated_count = true;    ivinfo.message_level = elevel;    /* We can only provide an approximate value of num_heap_tuples here */    //这里只能提供num_heap_tuples的近似值    ivinfo.num_heap_tuples = vacrelstats->old_live_tuples;    ivinfo.strategy = vac_strategy;    /* Do bulk deletion */    //执行批量删除    *stats = index_bulk_delete(&ivinfo, *stats,                               lazy_tid_reaped, (void *) vacrelstats);    ereport(elevel,            (errmsg("scanned index \"%s\" to remove %d row versions",                    RelationGetRelationName(indrel),                    vacrelstats->num_dead_tuples),             errdetail_internal("%s", pg_rusage_show(&ru0))));}

lazy_vacuum_index->index_bulk_delete
index_bulk_delete批量删除索引项,回调函数会给出main-heap元组是否将被删除,返回值是已预分配内存的统计信息结构体.

/* ---------------- *      index_bulk_delete - do mass deletion of index entries *      index_bulk_delete - 批量删除索引项 * *      callback routine tells whether a given main-heap tuple is *      to be deleted *      回调函数会给出main-heap元组是否将被删除. * *      return value is an optional palloc'd struct of statistics *      返回值是已预分配内存的统计信息结构体 * ---------------- */IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info,                  IndexBulkDeleteResult *stats,                  IndexBulkDeleteCallback callback,                  void *callback_state){    //获取relation    Relation    indexRelation = info->index;    RELATION_CHECKS;    CHECK_REL_PROCEDURE(ambulkdelete);    //ambulkdelete指向的实际函数是btbulkdelete    return indexRelation->rd_indam->ambulkdelete(info, stats,                                                 callback, callback_state);}

lazy_vacuum_index->index_bulk_delete->…btbulkdelete
Index Relation的rd_amroutine->ambulkdelete,实际是btbulkdelete函数

/* * Bulk deletion of all index entries pointing to a set of heap tuples. * The set of target tuples is specified via a callback routine that tells * whether any given heap tuple (identified by ItemPointer) is being deleted. * 批量删除指向heap tuples集合的索引条目. * 目标元组集合通过回调函数指定,从而得到哪些给定的元组(通过ItemPointer定义)将被删除. * * Result: a palloc'd struct containing statistical info for VACUUM displays. * 返回结果:用于VACUUM显示的统计信息 */IndexBulkDeleteResult *btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,             IndexBulkDeleteCallback callback, void *callback_state){    //relation    Relation    rel = info->index;    BTCycleId   cycleid;    /* allocate stats if first time through, else re-use existing struct */    //如果是第一次调用,则分配内存,否则重用已存在的结构体    if (stats == NULL)        stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));    /* Establish the vacuum cycle ID to use for this scan */    /* The ENSURE stuff ensures we clean up shared memory on failure */    //建立vacuum循环ID,用于本次扫描    //PG_ENSURE_ERROR_CLEANUP确保在发生故障时清理共享内存    PG_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel));    {        TransactionId oldestBtpoXact;//事务ID        //开始vacuum        cycleid = _bt_start_vacuum(rel);        //指向BTree vacuum扫描        btvacuumscan(info, stats, callback, callback_state, cycleid,                     &oldestBtpoXact);        /*         * Update cleanup-related information in metapage. This information is         * used only for cleanup but keeping them up to date can avoid         * unnecessary cleanup even after bulkdelete.         * 更新清理相关的信息.         * 该信息用于清理,但保持该信息最新可以避免不必要的清理.         */        _bt_update_meta_cleanup_info(info->index, oldestBtpoXact,                                     info->num_heap_tuples);    }    PG_END_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel));    _bt_end_vacuum(rel);    //返回统计信息    return stats;}

三、跟踪分析

测试脚本 : 删除数据,执行vacuum

10:08:46 (xdb@[local]:5432)testdb=# delete from t1 where id < 1200;DELETE 10011:26:03 (xdb@[local]:5432)testdb=# checkpoint;CHECKPOINT11:26:04 (xdb@[local]:5432)testdb=# 11:25:55 (xdb@[local]:5432)testdb=# vacuum t1;

启动gdb,设置断点

(gdb) b lazy_vacuum_indexBreakpoint 1 at 0x6bea40: file vacuumlazy.c, line 1689....Breakpoint 1, lazy_vacuum_index (indrel=0x7f7334825050, stats=0x2aaffb8, vacrelstats=0x2aaf958) at vacuumlazy.c:16891689        pg_rusage_init(&ru0);(gdb)

输入参数

(gdb) p *indrel$6 = {rd_node = {spcNode = 1663, dbNode = 16402, relNode = 50823}, rd_smgr = 0x0, rd_refcnt = 1, rd_backend = -1,   rd_islocaltemp = false, rd_isnailed = false, rd_isvalid = true, rd_indexvalid = 0 '\000', rd_statvalid = false,   rd_createSubid = 0, rd_newRelfilenodeSubid = 0, rd_rel = 0x7f733491ad20, rd_att = 0x7f733491a9b8, rd_id = 50823,   rd_lockInfo = {lockRelId = {relId = 50823, dbId = 16402}}, rd_rules = 0x0, rd_rulescxt = 0x0, trigdesc = 0x0,   rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkeycxt = 0x0, rd_partkey = 0x0, rd_pdcxt = 0x0,   rd_partdesc = 0x0, rd_partcheck = 0x0, rd_indexlist = 0x0, rd_oidindex = 0, rd_pkindex = 0, rd_replidindex = 0,   rd_statlist = 0x0, rd_indexattr = 0x0, rd_projindexattr = 0x0, rd_keyattr = 0x0, rd_pkattr = 0x0, rd_idattr = 0x0,   rd_projidx = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_index = 0x7f733491a8d8, rd_indextuple = 0x7f733491a8a0,   rd_amhandler = 330, rd_indexcxt = 0x2a05340, rd_amroutine = 0x2a05480, rd_opfamily = 0x2a05598, rd_opcintype = 0x2a055b8,   rd_support = 0x2a055d8, rd_supportinfo = 0x2a05600, rd_indoption = 0x2a05738, rd_indexprs = 0x0, rd_indpred = 0x0,   rd_exclops = 0x0, rd_exclprocs = 0x0, rd_exclstrats = 0x0, rd_amcache = 0x0, rd_indcollation = 0x2a05718,   rd_fdwroutine = 0x0, rd_toastoid = 0, pgstat_info = 0x2a5e198}(gdb) p *indrel->rd_rel$9 = {relname = {data = "idx_t1_id", '\000' }, relnamespace = 2200, reltype = 0, reloftype = 0,   relowner = 10, relam = 403, relfilenode = 50823, reltablespace = 0, relpages = 60, reltuples = 8901, relallvisible = 0,   reltoastrelid = 0, relhasindex = false, relisshared = false, relpersistence = 112 'p', relkind = 105 'i', relnatts = 1,   relchecks = 0, relhasoids = false, relhasrules = false, relhastriggers = false, relhassubclass = false,   relrowsecurity = false, relforcerowsecurity = false, relispopulated = true, relreplident = 110 'n',   relispartition = false, relrewrite = 0, relfrozenxid = 0, relminmxid = 0}(gdb) p *stats$7 = (IndexBulkDeleteResult *) 0x0(gdb) p *vacrelstats$8 = {hasindex = true, old_rel_pages = 124, rel_pages = 124, scanned_pages = 59, pinskipped_pages = 0,   frozenskipped_pages = 1, tupcount_pages = 59, old_live_tuples = 12686, new_rel_tuples = 14444, new_live_tuples = 14444,   new_dead_tuples = 0, pages_removed = 0, tuples_deleted = 100, nonempty_pages = 124, num_dead_tuples = 100,   max_dead_tuples = 36084, dead_tuples = 0x2ab8820, num_index_scans = 0, latestRemovedXid = 397076,   lock_waiter_detected = false}(gdb)

初始化IndexVacuumInfo结构体

(gdb) n1691        ivinfo.index = indrel;(gdb) 1692        ivinfo.analyze_only = false;(gdb) 1693        ivinfo.estimated_count = true;(gdb) 1694        ivinfo.message_level = elevel;(gdb) 1696        ivinfo.num_heap_tuples = vacrelstats->old_live_tuples;(gdb) 1697        ivinfo.strategy = vac_strategy;(gdb)

调用index_bulk_delete,进入该函数

1700        *stats = index_bulk_delete(&ivinfo, *stats,(gdb) stepindex_bulk_delete (info=0x7fff39c5d620, stats=0x0, callback=0x6bf507 , callback_state=0x2aaf958)    at indexam.c:748748     Relation    indexRelation = info->index;(gdb)

输入参数
info -> IndexVacuumInfo结构体
stats为NULL
回调函数为lazy_tid_reaped
回调函数状态结构体为callback_state

(gdb) p *info$10 = {index = 0x7f7334825050, analyze_only = false, estimated_count = true, message_level = 13, num_heap_tuples = 12686,   strategy = 0x2a9d478}(gdb) (gdb) p *callback_stateAttempt to dereference a generic pointer.(gdb) (gdb) p *info->strategy$11 = {btype = BAS_VACUUM, ring_size = 32, current = 4, current_was_in_ring = false, buffers = 0x2a9d488}(gdb)

调用indexRelation->rd_amroutine->ambulkdelete,该函数实际指向的是btbulkdelete

(gdb) n750     RELATION_CHECKS;(gdb) 751     CHECK_REL_PROCEDURE(ambulkdelete);(gdb) 753     return indexRelation->rd_amroutine->ambulkdelete(info, stats,(gdb) p indexRelation->rd_amroutine$12 = (struct IndexAmRoutine *) 0x2a05480(gdb) p *indexRelation->rd_amroutine$13 = {type = T_IndexAmRoutine, amstrategies = 5, amsupport = 3, amcanorder = true, amcanorderbyop = false,   amcanbackward = true, amcanunique = true, amcanmulticol = true, amoptionalkey = true, amsearcharray = true,   amsearchnulls = true, amstorage = false, amclusterable = true, ampredlocks = true, amcanparallel = true,   amcaninclude = true, amkeytype = 0, ambuild = 0x5123f0 , ambuildempty = 0x507e6b ,   aminsert = 0x507f11 , ambulkdelete = 0x5096b6 , amvacuumcleanup = 0x509845 ,   amcanreturn = 0x50a21f , amcostestimate = 0x9c5356 , amoptions = 0x511cd4 ,   amproperty = 0x511cfe , amvalidate = 0x51522b , ambeginscan = 0x5082f7 ,   amrescan = 0x508492 , amgettuple = 0x507f90 , amgetbitmap = 0x50819e ,   amendscan = 0x508838 , ammarkpos = 0x508b28 , amrestrpos = 0x508d20 ,   amestimateparallelscan = 0x5090e6 , aminitparallelscan = 0x5090f1 ,   amparallelrescan = 0x50913f }

进入btbulkdelete

(gdb) stepbtbulkdelete (info=0x7fff39c5d620, stats=0x0, callback=0x6bf507 , callback_state=0x2aaf958) at nbtree.c:857857     Relation    rel = info->index;(gdb)

输入参数参见上述函数输入参数,类似
获取relation,初始化统计信息

857     Relation    rel = info->index;(gdb) n861     if (stats == NULL)(gdb) 862         stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));(gdb) 866     PG_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel));(gdb)

获取cycleid

(gdb) n870         cycleid = _bt_start_vacuum(rel);(gdb) 872         btvacuumscan(info, stats, callback, callback_state, cycleid,(gdb) p cycleid$14 = 1702(gdb)

调用btvacuumscan,返回统计信息

(gdb) n880         _bt_update_meta_cleanup_info(info->index, oldestBtpoXact,(gdb) 883     PG_END_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel));(gdb) 884     _bt_end_vacuum(rel);(gdb) 886     return stats;(gdb) p *stats$15 = {num_pages = 60, pages_removed = 0, estimated_count = false, num_index_tuples = 8801, tuples_removed = 100,   pages_deleted = 6, pages_free = 6}(gdb)

DONE!

btvacuumscan下节再行介绍

四、参考资料

PG Source Code

0