千家信息网

PostgreSQL中AutoVacLauncherMain函数的实现逻辑是什么

发表于:2024-10-04 作者:千家信息网编辑
千家信息网最后更新 2024年10月04日,本篇内容介绍了"PostgreSQL中AutoVacLauncherMain函数的实现逻辑是什么"的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这
千家信息网最后更新 2024年10月04日PostgreSQL中AutoVacLauncherMain函数的实现逻辑是什么

本篇内容介绍了"PostgreSQL中AutoVacLauncherMain函数的实现逻辑是什么"的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!

一、数据结构

宏定义

#define GetProcessingMode() Mode#define SetProcessingMode(mode) \    do { \        AssertArg((mode) == BootstrapProcessing || \                  (mode) == InitProcessing || \                  (mode) == NormalProcessing); \        Mode = (mode); \    } while(0)

二、源码解读

AutoVacLauncherMain函数,autovacuum进程主循环.

/* * Main loop for the autovacuum launcher process. * autovacuum进程主循环 */NON_EXEC_STATIC voidAutoVacLauncherMain(int argc, char *argv[]){    sigjmp_buf  local_sigjmp_buf;    am_autovacuum_launcher = true;    /* Identify myself via ps */    //进程ID    init_ps_display(pgstat_get_backend_desc(B_AUTOVAC_LAUNCHER), "", "", "");    ereport(DEBUG1,            (errmsg("autovacuum launcher started")));    if (PostAuthDelay)        pg_usleep(PostAuthDelay * 1000000L);    //设置进程模式    SetProcessingMode(InitProcessing);    /*     * Set up signal handlers.  We operate on databases much like a regular     * backend, so we use the same signal handling.  See equivalent code in     * tcop/postgres.c.     * 设置信号控制器.     * autovacuum的执行类似于普通的后台进程,因此使用相同的信号控制机制.     * 参考tcop/postgres.c中的代码.     */    pqsignal(SIGHUP, av_sighup_handler);    pqsignal(SIGINT, StatementCancelHandler);    pqsignal(SIGTERM, avl_sigterm_handler);    pqsignal(SIGQUIT, quickdie);    //建立SIGALRM控制器    InitializeTimeouts();       /* establishes SIGALRM handler */    pqsignal(SIGPIPE, SIG_IGN);//忽略SIGPIPE    pqsignal(SIGUSR1, procsignal_sigusr1_handler);    pqsignal(SIGUSR2, avl_sigusr2_handler);    pqsignal(SIGFPE, FloatExceptionHandler);    pqsignal(SIGCHLD, SIG_DFL);    /* Early initialization */    //基础初始化    BaseInit();    /*     * Create a per-backend PGPROC struct in shared memory, except in the     * EXEC_BACKEND case where this was done in SubPostmasterMain. We must do     * this before we can use LWLocks (and in the EXEC_BACKEND case we already     * had to do some stuff with LWLocks).     * 在共享内存中创建每个后台进程的PGPROC结构体,     *   但除了exEXEC_BACKEND这种情况,这是在SubPostmasterMain中完成的。     */#ifndef EXEC_BACKEND    InitProcess();#endif    //初始化    InitPostgres(NULL, InvalidOid, NULL, InvalidOid, NULL, false);    //设置进程模式    SetProcessingMode(NormalProcessing);    /*     * Create a memory context that we will do all our work in.  We do this so     * that we can reset the context during error recovery and thereby avoid     * possible memory leaks.     * 创建内存上下文.     * 之所以这样做是因为我们可以在错误恢复中重置上下文,并且可以避免内存泄漏.     */    AutovacMemCxt = AllocSetContextCreate(TopMemoryContext,                                          "Autovacuum Launcher",                                          ALLOCSET_DEFAULT_SIZES);    MemoryContextSwitchTo(AutovacMemCxt);    /*     * If an exception is encountered, processing resumes here.     * 如果出现异常,在这里重新恢复.     *     * This code is a stripped down version of PostgresMain error recovery.     * 这段代码是PostgresMain错误恢复的精简版。     */    if (sigsetjmp(local_sigjmp_buf, 1) != 0)    {        /* since not using PG_TRY, must reset error stack by hand */        //由于没有使用PG_TRY,这里必须手工重置错误.        error_context_stack = NULL;        /* Prevents interrupts while cleaning up */        //在清理期间禁用中断        HOLD_INTERRUPTS();        /* Forget any pending QueryCancel or timeout request */        //忽略所有QueryCancel或者超时请求        disable_all_timeouts(false);        QueryCancelPending = false; /* second to avoid race condition */        /* Report the error to the server log */        //在服务器日志中记录日志.        EmitErrorReport();        /* Abort the current transaction in order to recover */        //废弃当前事务,以准备恢复        AbortCurrentTransaction();        /*         * Release any other resources, for the case where we were not in a         * transaction.         * 释放任何其他资源,以防我们不在事务中。         */        LWLockReleaseAll();        pgstat_report_wait_end();        AbortBufferIO();        UnlockBuffers();        /* this is probably dead code, but let's be safe: */        //这可能是dead code,但可以保证安全        if (AuxProcessResourceOwner)            ReleaseAuxProcessResources(false);        AtEOXact_Buffers(false);        AtEOXact_SMgr();        AtEOXact_Files(false);        AtEOXact_HashTables(false);        /*         * Now return to normal top-level context and clear ErrorContext for         * next time.         * 现在切换回正常的顶层上下文中,并为下一次的启动清理错误上下文         */        MemoryContextSwitchTo(AutovacMemCxt);        FlushErrorState();        /* Flush any leaked data in the top-level context */        //在top-level上下文刷新所有泄漏的数据        MemoryContextResetAndDeleteChildren(AutovacMemCxt);        /* don't leave dangling pointers to freed memory */        //不要留下悬空指针来释放内存        DatabaseListCxt = NULL;        dlist_init(&DatabaseList);        /*         * Make sure pgstat also considers our stat data as gone.  Note: we         * mustn't use autovac_refresh_stats here.         * 确保pgstat也认为我们的统计数据已经丢弃。         * 注意:这里不能使用autovac_refresh_stats。         */        pgstat_clear_snapshot();        /* Now we can allow interrupts again */        //可以允许中断了        RESUME_INTERRUPTS();        /* if in shutdown mode, no need for anything further; just go away */        //如处于shutdown模式,不需要继续后续的工作了,跳转到shutdown        if (got_SIGTERM)            goto shutdown;        /*         * Sleep at least 1 second after any error.  We don't want to be         * filling the error logs as fast as we can.         */        pg_usleep(1000000L);    }    /* We can now handle ereport(ERROR) */    //现在可以处理ereport(ERROR)了    PG_exception_stack = &local_sigjmp_buf;    /* must unblock signals before calling rebuild_database_list */    //在调用rebuild_database_list前不能阻塞信号    PG_SETMASK(&UnBlockSig);    /*     * Set always-secure search path.  Launcher doesn't connect to a database,     * so this has no effect.     * 设置安全的搜索路径.     * Launcher不能连接数据库,因此并没有什么影响.     */    SetConfigOption("search_path", "", PGC_SUSET, PGC_S_OVERRIDE);    /*     * Force zero_damaged_pages OFF in the autovac process, even if it is set     * in postgresql.conf.  We don't really want such a dangerous option being     * applied non-interactively.     * 在autovacuum进程中,强制关闭zero_damaged_pages,即时该参数在配置文件设置为ON.     * 我们真的不希望这样一个危险的选项在无需交互的情况进行应用.     */    SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);    /*     * Force settable timeouts off to avoid letting these settings prevent     * regular maintenance from being executed.     * 强制关闭可设置的超时,以避免这些设置妨碍常规维护的执行。     */    SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);    SetConfigOption("lock_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);    SetConfigOption("idle_in_transaction_session_timeout", "0",                    PGC_SUSET, PGC_S_OVERRIDE);    /*     * Force default_transaction_isolation to READ COMMITTED.  We don't want     * to pay the overhead of serializable mode, nor add any risk of causing     * deadlocks or delaying other transactions.     * 强制default_transaction_isolation为READ COMMITTED.     * 我们不希望在serializable模式下增加负担,也不想增加导致死锁或者其他事务延迟的风险.     */    SetConfigOption("default_transaction_isolation", "read committed",                    PGC_SUSET, PGC_S_OVERRIDE);    /*     * In emergency mode, just start a worker (unless shutdown was requested)     * and go away.     * 在紧急模式,启动一个worker(除非已请求shutdown)     */    if (!AutoVacuumingActive())    {        if (!got_SIGTERM)            do_start_worker();        proc_exit(0);           /* done */    }    AutoVacuumShmem->av_launcherpid = MyProcPid;    /*     * Create the initial database list.  The invariant we want this list to     * keep is that it's ordered by decreasing next_time.  As soon as an entry     * is updated to a higher time, it will be moved to the front (which is     * correct because the only operation is to add autovacuum_naptime to the     * entry, and time always increases).     * 创建初始化数据库链表.     * 我们希望这个链表保持不变的是它是通过减少next_time来进行排序.     * 一旦条目更新到更高的时间,它就会被移动到前面     * (这样处理没有问题,因为惟一的操作是向条目添加autovacuum_naptime,而时间总是会增加)。     */    rebuild_database_list(InvalidOid);    /* loop until shutdown request */    //循环,直至请求shutdown    while (!got_SIGTERM)    {        struct timeval nap;        TimestampTz current_time = 0;        bool        can_launch;        /*         * This loop is a bit different from the normal use of WaitLatch,         * because we'd like to sleep before the first launch of a child         * process.  So it's WaitLatch, then ResetLatch, then check for         * wakening conditions.         * 该循环与常规的使用WaitLatch不同,因为我们希望在第一个子进程启动前处于休眠状态.         * 因此首先是WaitLatch,然后是ResetLatch,然后检查并等待唤醒条件.         */        launcher_determine_sleep(!dlist_is_empty(&AutoVacuumShmem->av_freeWorkers),                                 false, &nap);        /*         * Wait until naptime expires or we get some type of signal (all the         * signal handlers will wake us by calling SetLatch).         * 等待,直至naptime超时或者我们接收到某些类型的信号.         * (所有的信号控制器会通过调用SetLatch唤醒进程)         */        (void) WaitLatch(MyLatch,                         WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,                         (nap.tv_sec * 1000L) + (nap.tv_usec / 1000L),                         WAIT_EVENT_AUTOVACUUM_MAIN);        ResetLatch(MyLatch);        /* Process sinval catchup interrupts that happened while sleeping */        //在休眠过程中,进程会捕获相关的中断.        ProcessCatchupInterrupt();        /* the normal shutdown case */        //shutdonw信号        if (got_SIGTERM)            break;        if (got_SIGHUP)        {            //SIGHUP信号            got_SIGHUP = false;            ProcessConfigFile(PGC_SIGHUP);            /* shutdown requested in config file? */            //在配置文件中已请求shutdown?            if (!AutoVacuumingActive())                break;            /* rebalance in case the default cost parameters changed */            //如默认的成本参数变化,则自动平衡.            LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);            autovac_balance_cost();            LWLockRelease(AutovacuumLock);            /* rebuild the list in case the naptime changed */            //如naptime出现变化,重建链表            rebuild_database_list(InvalidOid);        }        /*         * a worker finished, or postmaster signalled failure to start a         * worker         * 某个worker已完成,或者postmaster信号出现异常无法启动worker         */        if (got_SIGUSR2)        {            //SIGUSR2信号            got_SIGUSR2 = false;            /* rebalance cost limits, if needed */            //如需要,重平衡成本限制            if (AutoVacuumShmem->av_signal[AutoVacRebalance])            {                LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);                AutoVacuumShmem->av_signal[AutoVacRebalance] = false;                autovac_balance_cost();                LWLockRelease(AutovacuumLock);            }            if (AutoVacuumShmem->av_signal[AutoVacForkFailed])            {                /*                 * If the postmaster failed to start a new worker, we sleep                 * for a little while and resend the signal.  The new worker's                 * state is still in memory, so this is sufficient.  After                 * that, we restart the main loop.                 * 如果postmaster无法启动新的worker,休眠一段时间,重新发送信号.                 * 新的worker的状态仍然在内存中,因此这样处理是OK的.                 * 再次之后,重新启动主循环.                 *                 * XXX should we put a limit to the number of times we retry?                 * I don't think it makes much sense, because a future start                 * of a worker will continue to fail in the same way.                 * 是否增加重试次数的限制?XXX                 * 我们不想太过敏感,因为某个worker在未来的启动会以同样的方式持续失败.                 */                AutoVacuumShmem->av_signal[AutoVacForkFailed] = false;                pg_usleep(1000000L);    /* 1s */                SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER);                continue;            }        }        /*         * There are some conditions that we need to check before trying to         * start a worker.  First, we need to make sure that there is a worker         * slot available.  Second, we need to make sure that no other worker         * failed while starting up.         * 在尝试启动worker前,有一些条件需要检查.         * 首先,需要确保有可用的worker slot;其次,需要确保worker在启动时没有出现异常.         */        current_time = GetCurrentTimestamp();        LWLockAcquire(AutovacuumLock, LW_SHARED);        can_launch = !dlist_is_empty(&AutoVacuumShmem->av_freeWorkers);        if (AutoVacuumShmem->av_startingWorker != NULL)        {            int         waittime;            WorkerInfo  worker = AutoVacuumShmem->av_startingWorker;            /*             * We can't launch another worker when another one is still             * starting up (or failed while doing so), so just sleep for a bit             * more; that worker will wake us up again as soon as it's ready.             * We will only wait autovacuum_naptime seconds (up to a maximum             * of 60 seconds) for this to happen however.  Note that failure             * to connect to a particular database is not a problem here,             * because the worker removes itself from the startingWorker             * pointer before trying to connect.  Problems detected by the             * postmaster (like fork() failure) are also reported and handled             * differently.  The only problems that may cause this code to             * fire are errors in the earlier sections of AutoVacWorkerMain,             * before the worker removes the WorkerInfo from the             * startingWorker pointer.             * 在某个worker仍然在启动时,不能启动新的worker,因此休眠一段时间;             * 另外一个worker在ready后会第一时间唤醒我们.             * 只需要等待autovacuum_naptime参数设置的时间(单位秒)(最大为60s).             * 注意,在这里不能够连接一个特定的数据库不存在任何问题,因为worker在             *   尝试连接时,通过startingWorker指针销毁自己.             * 通过postmaster检测到问题(如fork()失败)会报告并且进行不同的处理,             *   这里唯一的问题是可能导致这里的处理逻辑在AutoVacWorkerMain的早起触发错误,             * 而且实在worker通过startingWorker指针清除WorkerInfo前.             */            waittime = Min(autovacuum_naptime, 60) * 1000;            if (TimestampDifferenceExceeds(worker->wi_launchtime, current_time,                                           waittime))            {                LWLockRelease(AutovacuumLock);                LWLockAcquire(AutovacuumLock, LW_EXCLUSIVE);                /*                 * No other process can put a worker in starting mode, so if                 * startingWorker is still INVALID after exchanging our lock,                 * we assume it's the same one we saw above (so we don't                 * recheck the launch time).                 */                if (AutoVacuumShmem->av_startingWorker != NULL)                {                    worker = AutoVacuumShmem->av_startingWorker;                    worker->wi_dboid = InvalidOid;                    worker->wi_tableoid = InvalidOid;                    worker->wi_sharedrel = false;                    worker->wi_proc = NULL;                    worker->wi_launchtime = 0;                    dlist_push_head(&AutoVacuumShmem->av_freeWorkers,                                    &worker->wi_links);                    AutoVacuumShmem->av_startingWorker = NULL;                    elog(WARNING, "worker took too long to start; canceled");                }            }            else                can_launch = false;        }        //释放锁        LWLockRelease(AutovacuumLock);  /* either shared or exclusive */        /* if we can't do anything, just go back to sleep */        //什么都做不了,继续休眠        if (!can_launch)            continue;        /* We're OK to start a new worker */        //现在可以启动新的worker          if (dlist_is_empty(&DatabaseList))        {            /*             * Special case when the list is empty: start a worker right away.             * This covers the initial case, when no database is in pgstats             * (thus the list is empty).  Note that the constraints in             * launcher_determine_sleep keep us from starting workers too             * quickly (at most once every autovacuum_naptime when the list is             * empty).             * 在链表为空时的特殊情况:正确的启动一个worker.             * 这涵盖了刚初始的情况,即pgstats中没有数据库(因此链表为空)。             * 请注意,launcher_determine_sleep中的约束使我们不能过快地启动worker             * (当链表为空时,最多一次autovacuum_naptime)。             */            launch_worker(current_time);        }        else        {            /*             * because rebuild_database_list constructs a list with most             * distant adl_next_worker first, we obtain our database from the             * tail of the list.             * 因为rebuild_database_list首先用最远的adl_next_worker构造了链表,             *   所以我们从链表的尾部获取数据库。             */            avl_dbase  *avdb;            avdb = dlist_tail_element(avl_dbase, adl_node, &DatabaseList);            /*             * launch a worker if next_worker is right now or it is in the             * past             * 启动worker,如果next_worker正当其时或者已成为过去时.             */            if (TimestampDifferenceExceeds(avdb->adl_next_worker,                                           current_time, 0))                launch_worker(current_time);        }    }    /* Normal exit from the autovac launcher is here */    //常规的退出.shutdown:    ereport(DEBUG1,            (errmsg("autovacuum launcher shutting down")));    AutoVacuumShmem->av_launcherpid = 0;    proc_exit(0);               /* done */}

"PostgreSQL中AutoVacLauncherMain函数的实现逻辑是什么"的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注网站,小编将为大家输出更多高质量的实用文章!

0