PostgreSQL 源码解读(10)- 插入数据#9(ProcessQuery)
发表于:2025-01-24 作者:千家信息网编辑
千家信息网最后更新 2025年01月24日,本文简单介绍了PG插入数据部分的源码,主要内容包括ProcessQuery函数的实现逻辑,该函数位于文件pquery.c中。一、基础信息ProcessQuery函数使用的数据结构、宏定义以及依赖的函数
千家信息网最后更新 2025年01月24日PostgreSQL 源码解读(10)- 插入数据#9(ProcessQuery)
本文简单介绍了PG插入数据部分的源码,主要内容包括ProcessQuery函数的实现逻辑,该函数位于文件pquery.c中。
一、基础信息
ProcessQuery函数使用的数据结构、宏定义以及依赖的函数等。
数据结构/宏定义
1、NodeTag
//节点标记,枚举类型 /* * The first field of every node is NodeTag. Each node created (with makeNode) * will have one of the following tags as the value of its first field. * * Note that inserting or deleting node types changes the numbers of other * node types later in the list. This is no problem during development, since * the node numbers are never stored on disk. But don't do it in a released * branch, because that would represent an ABI break for extensions. */ typedef enum NodeTag { T_Invalid = 0, /* * TAGS FOR EXECUTOR NODES (execnodes.h) */ T_IndexInfo, T_ExprContext, T_ProjectionInfo, T_JunkFilter, T_OnConflictSetState, T_ResultRelInfo, T_EState, T_TupleTableSlot, /* * TAGS FOR PLAN NODES (plannodes.h) */ T_Plan, T_Result, T_ProjectSet, T_ModifyTable, T_Append, T_MergeAppend, T_RecursiveUnion, T_BitmapAnd, T_BitmapOr, T_Scan, T_SeqScan, T_SampleScan, T_IndexScan, T_IndexOnlyScan, T_BitmapIndexScan, T_BitmapHeapScan, T_TidScan, T_SubqueryScan, T_FunctionScan, T_ValuesScan, T_TableFuncScan, T_CteScan, T_NamedTuplestoreScan, T_WorkTableScan, T_ForeignScan, T_CustomScan, T_Join, T_NestLoop, T_MergeJoin, T_HashJoin, T_Material, T_Sort, T_Group, T_Agg, T_WindowAgg, T_Unique, T_Gather, T_GatherMerge, T_Hash, T_SetOp, T_LockRows, T_Limit, /* these aren't subclasses of Plan: */ T_NestLoopParam, T_PlanRowMark, T_PartitionPruneInfo, T_PartitionedRelPruneInfo, T_PartitionPruneStepOp, T_PartitionPruneStepCombine, T_PlanInvalItem, /* * TAGS FOR PLAN STATE NODES (execnodes.h) * * These should correspond one-to-one with Plan node types. */ T_PlanState, T_ResultState, T_ProjectSetState, T_ModifyTableState, T_AppendState, T_MergeAppendState, T_RecursiveUnionState, T_BitmapAndState, T_BitmapOrState, T_ScanState, T_SeqScanState, T_SampleScanState, T_IndexScanState, T_IndexOnlyScanState, T_BitmapIndexScanState, T_BitmapHeapScanState, T_TidScanState, T_SubqueryScanState, T_FunctionScanState, T_TableFuncScanState, T_ValuesScanState, T_CteScanState, T_NamedTuplestoreScanState, T_WorkTableScanState, T_ForeignScanState, T_CustomScanState, T_JoinState, T_NestLoopState, T_MergeJoinState, T_HashJoinState, T_MaterialState, T_SortState, T_GroupState, T_AggState, T_WindowAggState, T_UniqueState, T_GatherState, T_GatherMergeState, T_HashState, T_SetOpState, T_LockRowsState, T_LimitState, /* * TAGS FOR PRIMITIVE NODES (primnodes.h) */ T_Alias, T_RangeVar, T_TableFunc, T_Expr, T_Var, T_Const, T_Param, T_Aggref, T_GroupingFunc, T_WindowFunc, T_ArrayRef, T_FuncExpr, T_NamedArgExpr, T_OpExpr, T_DistinctExpr, T_NullIfExpr, T_ScalarArrayOpExpr, T_BoolExpr, T_SubLink, T_SubPlan, T_AlternativeSubPlan, T_FieldSelect, T_FieldStore, T_RelabelType, T_CoerceViaIO, T_ArrayCoerceExpr, T_ConvertRowtypeExpr, T_CollateExpr, T_CaseExpr, T_CaseWhen, T_CaseTestExpr, T_ArrayExpr, T_RowExpr, T_RowCompareExpr, T_CoalesceExpr, T_MinMaxExpr, T_SQLValueFunction, T_XmlExpr, T_NullTest, T_BooleanTest, T_CoerceToDomain, T_CoerceToDomainValue, T_SetToDefault, T_CurrentOfExpr, T_NextValueExpr, T_InferenceElem, T_TargetEntry, T_RangeTblRef, T_JoinExpr, T_FromExpr, T_OnConflictExpr, T_IntoClause, /* * TAGS FOR EXPRESSION STATE NODES (execnodes.h) * * ExprState represents the evaluation state for a whole expression tree. * Most Expr-based plan nodes do not have a corresponding expression state * node, they're fully handled within execExpr* - but sometimes the state * needs to be shared with other parts of the executor, as for example * with AggrefExprState, which nodeAgg.c has to modify. */ T_ExprState, T_AggrefExprState, T_WindowFuncExprState, T_SetExprState, T_SubPlanState, T_AlternativeSubPlanState, T_DomainConstraintState, /* * TAGS FOR PLANNER NODES (relation.h) */ T_PlannerInfo, T_PlannerGlobal, T_RelOptInfo, T_IndexOptInfo, T_ForeignKeyOptInfo, T_ParamPathInfo, T_Path, T_IndexPath, T_BitmapHeapPath, T_BitmapAndPath, T_BitmapOrPath, T_TidPath, T_SubqueryScanPath, T_ForeignPath, T_CustomPath, T_NestPath, T_MergePath, T_HashPath, T_AppendPath, T_MergeAppendPath, T_ResultPath, T_MaterialPath, T_UniquePath, T_GatherPath, T_GatherMergePath, T_ProjectionPath, T_ProjectSetPath, T_SortPath, T_GroupPath, T_UpperUniquePath, T_AggPath, T_GroupingSetsPath, T_MinMaxAggPath, T_WindowAggPath, T_SetOpPath, T_RecursiveUnionPath, T_LockRowsPath, T_ModifyTablePath, T_LimitPath, /* these aren't subclasses of Path: */ T_EquivalenceClass, T_EquivalenceMember, T_PathKey, T_PathTarget, T_RestrictInfo, T_PlaceHolderVar, T_SpecialJoinInfo, T_AppendRelInfo, T_PlaceHolderInfo, T_MinMaxAggInfo, T_PlannerParamItem, T_RollupData, T_GroupingSetData, T_StatisticExtInfo, /* * TAGS FOR MEMORY NODES (memnodes.h) */ T_MemoryContext, T_AllocSetContext, T_SlabContext, T_GenerationContext, /* * TAGS FOR VALUE NODES (value.h) */ T_Value, T_Integer, T_Float, T_String, T_BitString, T_Null, /* * TAGS FOR LIST NODES (pg_list.h) */ T_List, T_IntList, T_OidList, /* * TAGS FOR EXTENSIBLE NODES (extensible.h) */ T_ExtensibleNode, /* * TAGS FOR STATEMENT NODES (mostly in parsenodes.h) */ T_RawStmt, T_Query, T_PlannedStmt, T_InsertStmt, T_DeleteStmt, T_UpdateStmt, T_SelectStmt, T_AlterTableStmt, T_AlterTableCmd, T_AlterDomainStmt, T_SetOperationStmt, T_GrantStmt, T_GrantRoleStmt, T_AlterDefaultPrivilegesStmt, T_ClosePortalStmt, T_ClusterStmt, T_CopyStmt, T_CreateStmt, T_DefineStmt, T_DropStmt, T_TruncateStmt, T_CommentStmt, T_FetchStmt, T_IndexStmt, T_CreateFunctionStmt, T_AlterFunctionStmt, T_DoStmt, T_RenameStmt, T_RuleStmt, T_NotifyStmt, T_ListenStmt, T_UnlistenStmt, T_TransactionStmt, T_ViewStmt, T_LoadStmt, T_CreateDomainStmt, T_CreatedbStmt, T_DropdbStmt, T_VacuumStmt, T_ExplainStmt, T_CreateTableAsStmt, T_CreateSeqStmt, T_AlterSeqStmt, T_VariableSetStmt, T_VariableShowStmt, T_DiscardStmt, T_CreateTrigStmt, T_CreatePLangStmt, T_CreateRoleStmt, T_AlterRoleStmt, T_DropRoleStmt, T_LockStmt, T_ConstraintsSetStmt, T_ReindexStmt, T_CheckPointStmt, T_CreateSchemaStmt, T_AlterDatabaseStmt, T_AlterDatabaseSetStmt, T_AlterRoleSetStmt, T_CreateConversionStmt, T_CreateCastStmt, T_CreateOpClassStmt, T_CreateOpFamilyStmt, T_AlterOpFamilyStmt, T_PrepareStmt, T_ExecuteStmt, T_DeallocateStmt, T_DeclareCursorStmt, T_CreateTableSpaceStmt, T_DropTableSpaceStmt, T_AlterObjectDependsStmt, T_AlterObjectSchemaStmt, T_AlterOwnerStmt, T_AlterOperatorStmt, T_DropOwnedStmt, T_ReassignOwnedStmt, T_CompositeTypeStmt, T_CreateEnumStmt, T_CreateRangeStmt, T_AlterEnumStmt, T_AlterTSDictionaryStmt, T_AlterTSConfigurationStmt, T_CreateFdwStmt, T_AlterFdwStmt, T_CreateForeignServerStmt, T_AlterForeignServerStmt, T_CreateUserMappingStmt, T_AlterUserMappingStmt, T_DropUserMappingStmt, T_AlterTableSpaceOptionsStmt, T_AlterTableMoveAllStmt, T_SecLabelStmt, T_CreateForeignTableStmt, T_ImportForeignSchemaStmt, T_CreateExtensionStmt, T_AlterExtensionStmt, T_AlterExtensionContentsStmt, T_CreateEventTrigStmt, T_AlterEventTrigStmt, T_RefreshMatViewStmt, T_ReplicaIdentityStmt, T_AlterSystemStmt, T_CreatePolicyStmt, T_AlterPolicyStmt, T_CreateTransformStmt, T_CreateAmStmt, T_CreatePublicationStmt, T_AlterPublicationStmt, T_CreateSubscriptionStmt, T_AlterSubscriptionStmt, T_DropSubscriptionStmt, T_CreateStatsStmt, T_AlterCollationStmt, T_CallStmt, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) */ T_A_Expr, T_ColumnRef, T_ParamRef, T_A_Const, T_FuncCall, T_A_Star, T_A_Indices, T_A_Indirection, T_A_ArrayExpr, T_ResTarget, T_MultiAssignRef, T_TypeCast, T_CollateClause, T_SortBy, T_WindowDef, T_RangeSubselect, T_RangeFunction, T_RangeTableSample, T_RangeTableFunc, T_RangeTableFuncCol, T_TypeName, T_ColumnDef, T_IndexElem, T_Constraint, T_DefElem, T_RangeTblEntry, T_RangeTblFunction, T_TableSampleClause, T_WithCheckOption, T_SortGroupClause, T_GroupingSet, T_WindowClause, T_ObjectWithArgs, T_AccessPriv, T_CreateOpClassItem, T_TableLikeClause, T_FunctionParameter, T_LockingClause, T_RowMarkClause, T_XmlSerialize, T_WithClause, T_InferClause, T_OnConflictClause, T_CommonTableExpr, T_RoleSpec, T_TriggerTransition, T_PartitionElem, T_PartitionSpec, T_PartitionBoundSpec, T_PartitionRangeDatum, T_PartitionCmd, T_VacuumRelation, /* * TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h) */ T_IdentifySystemCmd, T_BaseBackupCmd, T_CreateReplicationSlotCmd, T_DropReplicationSlotCmd, T_StartReplicationCmd, T_TimeLineHistoryCmd, T_SQLCmd, /* * TAGS FOR RANDOM OTHER STUFF * * These are objects that aren't part of parse/plan/execute node tree * structures, but we give them NodeTags anyway for identification * purposes (usually because they are involved in APIs where we want to * pass multiple object types through the same pointer). */ T_TriggerData, /* in commands/trigger.h */ T_EventTriggerData, /* in commands/event_trigger.h */ T_ReturnSetInfo, /* in nodes/execnodes.h */ T_WindowObjectData, /* private in nodeWindowAgg.c */ T_TIDBitmap, /* in nodes/tidbitmap.h */ T_InlineCodeBlock, /* in nodes/parsenodes.h */ T_FdwRoutine, /* in foreign/fdwapi.h */ T_IndexAmRoutine, /* in access/amapi.h */ T_TsmRoutine, /* in access/tsmapi.h */ T_ForeignKeyCacheInfo, /* in utils/rel.h */ T_CallContext /* in nodes/parsenodes.h */ } NodeTag; /* * The first field of a node of any type is guaranteed to be the NodeTag. * Hence the type of any node can be gotten by casting it to Node. Declaring * a variable to be of Node * (instead of void *) can also facilitate * debugging. */ typedef struct Node { NodeTag type; } Node; #define nodeTag(nodeptr) (((const Node*)(nodeptr))->type)
2、MemoryContext
//内存上下文//AllocSetContext结构体的MemoryContextData与其共享// typedef struct MemoryContextData { NodeTag type; /* identifies exact kind of context */ /* these two fields are placed here to minimize alignment wastage: */ bool isReset; /* T = no space alloced since last reset */ bool allowInCritSection; /* allow palloc in critical section */ const MemoryContextMethods *methods; /* virtual function table */ MemoryContext parent; /* NULL if no parent (toplevel context) */ MemoryContext firstchild; /* head of linked list of children */ MemoryContext prevchild; /* previous child of same parent */ MemoryContext nextchild; /* next child of same parent */ const char *name; /* context name (just for debugging) */ const char *ident; /* context ID if any (just for debugging) */ MemoryContextCallback *reset_cbs; /* list of reset/delete callbacks */ } MemoryContextData; /* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */ /* * Type MemoryContextData is declared in nodes/memnodes.h. Most users * of memory allocation should just treat it as an abstract type, so we * do not provide the struct contents here. */ typedef struct MemoryContextData *MemoryContext;
3、AllocSet
/* * AllocSetContext is our standard implementation of MemoryContext. * * Note: header.isReset means there is nothing for AllocSetReset to do. * This is different from the aset being physically empty (empty blocks list) * because we will still have a keeper block. It's also different from the set * being logically empty, because we don't attempt to detect pfree'ing the * last active chunk. */ typedef struct AllocSetContext { MemoryContextData header; /* Standard memory-context fields */ /* Info about storage allocated in this context: */ AllocBlock blocks; /* head of list of blocks in this set */ AllocChunk freelist[ALLOCSET_NUM_FREELISTS]; /* free chunk lists */ /* Allocation parameters for this context: */ Size initBlockSize; /* initial block size */ Size maxBlockSize; /* maximum block size */ Size nextBlockSize; /* next block size to allocate */ Size allocChunkLimit; /* effective chunk size limit */ AllocBlock keeper; /* keep this block over resets */ /* freelist this context could be put in, or -1 if not a candidate: */ int freeListIndex; /* index in context_freelists[], or -1 */ } AllocSetContext; typedef AllocSetContext *AllocSet;
4、AllocBlock
/* * AllocBlock * An AllocBlock is the unit of memory that is obtained by aset.c * from malloc(). It contains one or more AllocChunks, which are * the units requested by palloc() and freed by pfree(). AllocChunks * cannot be returned to malloc() individually, instead they are put * on freelists by pfree() and re-used by the next palloc() that has * a matching request size. * * AllocBlockData is the header data for a block --- the usable space * within the block begins at the next alignment boundary. */ typedef struct AllocBlockData { AllocSet aset; /* aset that owns this block */ AllocBlock prev; /* prev block in aset's blocks list, if any */ AllocBlock next; /* next block in aset's blocks list, if any */ char *freeptr; /* start of free space in this block */ char *endptr; /* end of space in this block */ } AllocBlockData; typedef struct AllocBlockData *AllocBlock; /* forward reference */
5、AllocChunk
/* * AllocChunk * The prefix of each piece of memory in an AllocBlock * * Note: to meet the memory context APIs, the payload area of the chunk must * be maxaligned, and the "aset" link must be immediately adjacent to the * payload area (cf. GetMemoryChunkContext). We simplify matters for this * module by requiring sizeof(AllocChunkData) to be maxaligned, and then * we can ensure things work by adding any required alignment padding before * the "aset" field. There is a static assertion below that the alignment * is done correctly. */ typedef struct AllocChunkData { /* size is always the size of the usable space in the chunk */ Size size; #ifdef MEMORY_CONTEXT_CHECKING /* when debugging memory usage, also store actual requested size */ /* this is zero in a free chunk */ Size requested_size; #define ALLOCCHUNK_RAWSIZE (SIZEOF_SIZE_T * 2 + SIZEOF_VOID_P) #else #define ALLOCCHUNK_RAWSIZE (SIZEOF_SIZE_T + SIZEOF_VOID_P) #endif /* MEMORY_CONTEXT_CHECKING */ /* ensure proper alignment by adding padding if needed */ #if (ALLOCCHUNK_RAWSIZE % MAXIMUM_ALIGNOF) != 0 char padding[MAXIMUM_ALIGNOF - ALLOCCHUNK_RAWSIZE % MAXIMUM_ALIGNOF]; #endif /* aset is the owning aset if allocated, or the freelist link if free */ void *aset; /* there must not be any padding to reach a MAXALIGN boundary here! */ } AllocChunkData; typedef struct AllocChunkData *AllocChunk;
6、AllocSetFreeList
typedef struct AllocSetFreeList{ int num_free; /* current list length */ AllocSetContext *first_free; /* list header */} AllocSetFreeList;
7、context_freelists
/* context_freelists[0] is for default params, [1] for small params */static AllocSetFreeList context_freelists[2] ={ { 0, NULL }, { 0, NULL }};
8、AllocSetMethods
//内存管理方法 typedef struct MemoryContextMethods { void *(*alloc) (MemoryContext context, Size size); /* call this free_p in case someone #define's free() */ void (*free_p) (MemoryContext context, void *pointer); void *(*realloc) (MemoryContext context, void *pointer, Size size); void (*reset) (MemoryContext context); void (*delete_context) (MemoryContext context); Size (*get_chunk_space) (MemoryContext context, void *pointer); bool (*is_empty) (MemoryContext context); void (*stats) (MemoryContext context, MemoryStatsPrintFunc printfunc, void *passthru, MemoryContextCounters *totals); #ifdef MEMORY_CONTEXT_CHECKING void (*check) (MemoryContext context); #endif } MemoryContextMethods;//预定义 /* * This is the virtual function table for AllocSet contexts. */ static const MemoryContextMethods AllocSetMethods = { AllocSetAlloc, AllocSetFree, AllocSetRealloc, AllocSetReset, AllocSetDelete, AllocSetGetChunkSpace, AllocSetIsEmpty, AllocSetStats #ifdef MEMORY_CONTEXT_CHECKING ,AllocSetCheck #endif };
9、宏定义
#define ALLOC_BLOCKHDRSZ MAXALIGN(sizeof(AllocBlockData)) #define ALLOC_CHUNKHDRSZ sizeof(struct AllocChunkData) /* * Recommended default alloc parameters, suitable for "ordinary" contexts * that might hold quite a lot of data. */ #define ALLOCSET_DEFAULT_MINSIZE 0 #define ALLOCSET_DEFAULT_INITSIZE (8 * 1024) #define ALLOCSET_DEFAULT_MAXSIZE (8 * 1024 * 1024) #define ALLOCSET_DEFAULT_SIZES \ ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE #define VALGRIND_MAKE_MEM_NOACCESS(addr, size) do {} while (0) #define ALLOC_MINBITS 3 /* smallest chunk size is 8 bytes */ #define ALLOCSET_NUM_FREELISTS 11 #define ALLOC_CHUNK_LIMIT (1 << (ALLOCSET_NUM_FREELISTS-1+ALLOC_MINBITS)) #define ALLOCSET_SEPARATE_THRESHOLD 8192#define ALLOC_CHUNK_FRACTION 4 /* We allow chunks to be at most 1/4 of maxBlockSize (less overhead) */
依赖的函数
1、CreateQueryDesc
//根据输入的参数构造QueryDesc /* * CreateQueryDesc */ QueryDesc * CreateQueryDesc(PlannedStmt *plannedstmt, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, DestReceiver *dest, ParamListInfo params, QueryEnvironment *queryEnv, int instrument_options) { QueryDesc *qd = (QueryDesc *) palloc(sizeof(QueryDesc)); qd->operation = plannedstmt->commandType; /* operation */ qd->plannedstmt = plannedstmt; /* plan */ qd->sourceText = sourceText; /* query text */ qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */ /* RI check snapshot */ qd->crosscheck_snapshot = RegisterSnapshot(crosscheck_snapshot); qd->dest = dest; /* output dest */ qd->params = params; /* parameter values passed into query */ qd->queryEnv = queryEnv; qd->instrument_options = instrument_options; /* instrumentation wanted? */ /* null these fields until set by ExecutorStart */ qd->tupDesc = NULL; qd->estate = NULL; qd->planstate = NULL; qd->totaltime = NULL; /* not yet executed */ qd->already_executed = false; return qd; }
2、CreateExecutorState
/* ---------------- * CreateExecutorState * * Create and initialize an EState node, which is the root of * working storage for an entire Executor invocation. * * Principally, this creates the per-query memory context that will be * used to hold all working data that lives till the end of the query. * Note that the per-query context will become a child of the caller's * CurrentMemoryContext. * ---------------- */ EState * CreateExecutorState(void)//构建EState { EState *estate;//EState指针 MemoryContext qcontext; MemoryContext oldcontext; /* * Create the per-query context for this Executor run. */ qcontext = AllocSetContextCreate(CurrentMemoryContext, "ExecutorState", ALLOCSET_DEFAULT_SIZES);//创建MemoryContext /* * Make the EState node within the per-query context. This way, we don't * need a separate pfree() operation for it at shutdown. */ oldcontext = MemoryContextSwitchTo(qcontext);//切换至新创建的MemoryContext estate = makeNode(EState);//创建Node /* * Initialize all fields of the Executor State structure */ //初始化Executor State structure estate->es_direction = ForwardScanDirection; estate->es_snapshot = InvalidSnapshot; /* caller must initialize this */ estate->es_crosscheck_snapshot = InvalidSnapshot; /* no crosscheck */ estate->es_range_table = NIL; estate->es_plannedstmt = NULL; estate->es_junkFilter = NULL; estate->es_output_cid = (CommandId) 0; estate->es_result_relations = NULL; estate->es_num_result_relations = 0; estate->es_result_relation_info = NULL; estate->es_root_result_relations = NULL; estate->es_num_root_result_relations = 0; estate->es_tuple_routing_result_relations = NIL; estate->es_trig_target_relations = NIL; estate->es_trig_tuple_slot = NULL; estate->es_trig_oldtup_slot = NULL; estate->es_trig_newtup_slot = NULL; estate->es_param_list_info = NULL; estate->es_param_exec_vals = NULL; estate->es_queryEnv = NULL; estate->es_query_cxt = qcontext; estate->es_tupleTable = NIL; estate->es_rowMarks = NIL; estate->es_processed = 0; estate->es_lastoid = InvalidOid; estate->es_top_eflags = 0; estate->es_instrument = 0; estate->es_finished = false; estate->es_exprcontexts = NIL; estate->es_subplanstates = NIL; estate->es_auxmodifytables = NIL; estate->es_per_tuple_exprcontext = NULL; estate->es_epqTuple = NULL; estate->es_epqTupleSet = NULL; estate->es_epqScanDone = NULL; estate->es_sourceText = NULL; estate->es_use_parallel_mode = false; estate->es_jit_flags = 0; estate->es_jit = NULL; /* * Return the executor state structure */ MemoryContextSwitchTo(oldcontext); return estate; } /*------------------ makeNode --------------------*/ #define makeNode(_type_) ((_type_ *) newNode(sizeof(_type_),T_##_type_)) /* * newNode - * create a new node of the specified size and tag the node with the * specified tag. * * !WARNING!: Avoid using newNode directly. You should be using the * macro makeNode. eg. to create a Query node, use makeNode(Query) * * Note: the size argument should always be a compile-time constant, so the * apparent risk of multiple evaluation doesn't matter in practice. */ #ifdef __GNUC__ /* With GCC, we can use a compound statement within an expression */ #define newNode(size, tag) \ ({ Node *_result; \ AssertMacro((size) >= sizeof(Node)); /* need the tag, at least */ \ _result = (Node *) palloc0fast(size); \ _result->type = (tag); \ _result; \ }) #else /* * There is no way to dereference the palloc'ed pointer to assign the * tag, and also return the pointer itself, so we need a holder variable. * Fortunately, this macro isn't recursive so we just define * a global variable for this purpose. */ extern PGDLLIMPORT Node *newNodeMacroHolder; #define newNode(size, tag) \ ( \ AssertMacro((size) >= sizeof(Node)), /* need the tag, at least */ \ newNodeMacroHolder = (Node *) palloc0fast(size), \ newNodeMacroHolder->type = (tag), \ newNodeMacroHolder \ ) #endif /* __GNUC__ */ /*------------------ AllocSetContextCreate --------------------*/ #define AllocSetContextCreate(parent, name, allocparams) \ AllocSetContextCreateExtended(parent, name, allocparams) #endif /* * AllocSetContextCreateExtended * Create a new AllocSet context. * * parent: parent context, or NULL if top-level context * name: name of context (must be statically allocated) * minContextSize: minimum context size * initBlockSize: initial allocation block size * maxBlockSize: maximum allocation block size * * Most callers should abstract the context size parameters using a macro * such as ALLOCSET_DEFAULT_SIZES. (This is now *required* when going * through the AllocSetContextCreate macro.) */ MemoryContext AllocSetContextCreateExtended(MemoryContext parent,//上一级的内存上下文 const char *name,//MemoryContext名称 Size minContextSize,//最小大小 Size initBlockSize,//初始化大小 Size maxBlockSize)//最大大小 { int freeListIndex;//空闲列表索引 Size firstBlockSize;//第一个Block的Size AllocSet set;//AllocSetContext指针 AllocBlock block;//Context中的Block /* Assert we padded AllocChunkData properly */ StaticAssertStmt(ALLOC_CHUNKHDRSZ == MAXALIGN(ALLOC_CHUNKHDRSZ), "sizeof(AllocChunkData) is not maxaligned");//对齐 StaticAssertStmt(offsetof(AllocChunkData, aset) + sizeof(MemoryContext) == ALLOC_CHUNKHDRSZ, "padding calculation in AllocChunkData is wrong");//对齐 /* * First, validate allocation parameters. Once these were regular runtime * test and elog's, but in practice Asserts seem sufficient because nobody * varies their parameters at runtime. We somewhat arbitrarily enforce a * minimum 1K block size. */ //验证参数 Assert(initBlockSize == MAXALIGN(initBlockSize) && initBlockSize >= 1024); Assert(maxBlockSize == MAXALIGN(maxBlockSize) && maxBlockSize >= initBlockSize && AllocHugeSizeIsValid(maxBlockSize)); /* must be safe to double */ Assert(minContextSize == 0 || (minContextSize == MAXALIGN(minContextSize) && minContextSize >= 1024 && minContextSize <= maxBlockSize)); /* * Check whether the parameters match either available freelist. We do * not need to demand a match of maxBlockSize. */ if (minContextSize == ALLOCSET_DEFAULT_MINSIZE && initBlockSize == ALLOCSET_DEFAULT_INITSIZE) freeListIndex = 0; else if (minContextSize == ALLOCSET_SMALL_MINSIZE && initBlockSize == ALLOCSET_SMALL_INITSIZE) freeListIndex = 1; else freeListIndex = -1;//默认为-1 /* * If a suitable freelist entry exists, just recycle that context. */ if (freeListIndex >= 0)//SMALL/DEFAULT值 { AllocSetFreeList *freelist = &context_freelists[freeListIndex];//使用预定义的freelist if (freelist->first_free != NULL) { /* Remove entry from freelist */ set = freelist->first_free;//使用第一个空闲的AllocSetContext freelist->first_free = (AllocSet) set->header.nextchild;//指针指向下一个空闲的AllocSetContext freelist->num_free--;//free计数器减一 /* Update its maxBlockSize; everything else should be OK */ set->maxBlockSize = maxBlockSize;//更新AllocSetContext的相关信息 /* Reinitialize its header, installing correct name and parent */ MemoryContextCreate((MemoryContext) set, T_AllocSetContext, &AllocSetMethods, parent, name);//创建MemoryContext return (MemoryContext) set;//返回 } } //freeListIndex = -1,定制化自己的MemoryContext /* Determine size of initial block */ firstBlockSize = MAXALIGN(sizeof(AllocSetContext)) + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;//申请内存:AllocSetContext大小+Block头部大小+Chunk头部大小 if (minContextSize != 0) firstBlockSize = Max(firstBlockSize, minContextSize); else firstBlockSize = Max(firstBlockSize, initBlockSize); /* * Allocate the initial block. Unlike other aset.c blocks, it starts with * the context header and its block header follows that. */ set = (AllocSet) malloc(firstBlockSize);//分配内存 if (set == NULL)//OOM? { if (TopMemoryContext) MemoryContextStats(TopMemoryContext); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"), errdetail("Failed while creating memory context \"%s\".", name))); } /* * Avoid writing code that can fail between here and MemoryContextCreate; * we'd leak the header/initial block if we ereport in this stretch. */ /* Fill in the initial block's block header */ //获取Block头部指针,开始填充Block头部信息 block = (AllocBlock) (((char *) set) + MAXALIGN(sizeof(AllocSetContext))); block->aset = set; block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ; block->endptr = ((char *) set) + firstBlockSize; block->prev = NULL; block->next = NULL; /* Mark unallocated space NOACCESS; leave the block header alone. */ VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, block->endptr - block->freeptr); /* Remember block as part of block list */ set->blocks = block; /* Mark block as not to be released at reset time */ set->keeper = block; /* Finish filling in aset-specific parts of the context header */ MemSetAligned(set->freelist, 0, sizeof(set->freelist));//对齐 set->initBlockSize = initBlockSize;//初始化set的各项属性 set->maxBlockSize = maxBlockSize; set->nextBlockSize = initBlockSize; set->freeListIndex = freeListIndex; /* * Compute the allocation chunk size limit for this context. It can't be * more than ALLOC_CHUNK_LIMIT because of the fixed number of freelists. * If maxBlockSize is small then requests exceeding the maxBlockSize, or * even a significant fraction of it, should be treated as large chunks * too. For the typical case of maxBlockSize a power of 2, the chunk size * limit will be at most 1/8th maxBlockSize, so that given a stream of * requests that are all the maximum chunk size we will waste at most * 1/8th of the allocated space. * * We have to have allocChunkLimit a power of two, because the requested * and actually-allocated sizes of any chunk must be on the same side of * the limit, else we get confused about whether the chunk is "big". * * Also, allocChunkLimit must not exceed ALLOCSET_SEPARATE_THRESHOLD. */ StaticAssertStmt(ALLOC_CHUNK_LIMIT == ALLOCSET_SEPARATE_THRESHOLD,//ALLOCSET_SEPARATE_THRESHOLD,8192 "ALLOC_CHUNK_LIMIT != ALLOCSET_SEPARATE_THRESHOLD"); set->allocChunkLimit = ALLOC_CHUNK_LIMIT; while ((Size) (set->allocChunkLimit + ALLOC_CHUNKHDRSZ) > (Size) ((maxBlockSize - ALLOC_BLOCKHDRSZ) / ALLOC_CHUNK_FRACTION))//ALLOC_CHUNK_FRACTION-4 set->allocChunkLimit >>= 1;//计算ChunkLimit上限,每次/2 /* Finally, do the type-independent part of context creation */ MemoryContextCreate((MemoryContext) set, T_AllocSetContext, &AllocSetMethods, parent, name);//创建MemoryContext return (MemoryContext) set; } /* * MemoryContextCreate * Context-type-independent part of context creation. * * This is only intended to be called by context-type-specific * context creation routines, not by the unwashed masses. * * The memory context creation procedure goes like this: * 1. Context-type-specific routine makes some initial space allocation, * including enough space for the context header. If it fails, * it can ereport() with no damage done. * 2. Context-type-specific routine sets up all type-specific fields of * the header (those beyond MemoryContextData proper), as well as any * other management fields it needs to have a fully valid context. * Usually, failure in this step is impossible, but if it's possible * the initial space allocation should be freed before ereport'ing. * 3. Context-type-specific routine calls MemoryContextCreate() to fill in * the generic header fields and link the context into the context tree. * 4. We return to the context-type-specific routine, which finishes * up type-specific initialization. This routine can now do things * that might fail (like allocate more memory), so long as it's * sure the node is left in a state that delete will handle. * * node: the as-yet-uninitialized common part of the context header node. * tag: NodeTag code identifying the memory context type. * methods: context-type-specific methods (usually statically allocated). * parent: parent context, or NULL if this will be a top-level context. * name: name of context (must be statically allocated). * * Context routines generally assume that MemoryContextCreate can't fail, * so this can contain Assert but not elog/ereport. */ void MemoryContextCreate(MemoryContext node, NodeTag tag, const MemoryContextMethods *methods, MemoryContext parent, const char *name) { /* Creating new memory contexts is not allowed in a critical section */ Assert(CritSectionCount == 0); /* Initialize all standard fields of memory context header */ node->type = tag; node->isReset = true; node->methods = methods; node->parent = parent; node->firstchild = NULL; node->prevchild = NULL; node->name = name; node->ident = NULL; node->reset_cbs = NULL; /* OK to link node into context tree */ if (parent) { node->nextchild = parent->firstchild; if (parent->firstchild != NULL) parent->firstchild->prevchild = node; parent->firstchild = node; /* inherit allowInCritSection flag from parent */ node->allowInCritSection = parent->allowInCritSection; } else { node->nextchild = NULL; node->allowInCritSection = false; } VALGRIND_CREATE_MEMPOOL(node, 0, false); }
3、InitPlan
/* ---------------------------------------------------------------- * InitPlan * * Initializes the query plan: open files, allocate storage * and start up the rule manager * ---------------------------------------------------------------- */ static void InitPlan(QueryDesc *queryDesc, int eflags) { CmdType operation = queryDesc->operation;//操作类型 PlannedStmt *plannedstmt = queryDesc->plannedstmt;//已规划的Statement Plan *plan = plannedstmt->planTree;//执行计划 List *rangeTable = plannedstmt->rtable;//本次执行涉及的Table EState *estate = queryDesc->estate;//执行状态 PlanState *planstate;//计划状态 TupleDesc tupType;//Tuple信息 ListCell *l;// int i;// /* * Do permissions checks */ ExecCheckRTPerms(rangeTable, true);//权限检查 /* * initialize the node's execution state */ estate->es_range_table = rangeTable; estate->es_plannedstmt = plannedstmt; /* * initialize result relation stuff, and open/lock the result rels. * * We must do this before initializing the plan tree, else we might try to * do a lock upgrade if a result rel is also a source rel. */ //初始化结果Relation if (plannedstmt->resultRelations) { List *resultRelations = plannedstmt->resultRelations; int numResultRelations = list_length(resultRelations); ResultRelInfo *resultRelInfos; ResultRelInfo *resultRelInfo; resultRelInfos = (ResultRelInfo *) palloc(numResultRelations * sizeof(ResultRelInfo)); resultRelInfo = resultRelInfos; foreach(l, resultRelations) { Index resultRelationIndex = lfirst_int(l); Oid resultRelationOid; Relation resultRelation; resultRelationOid = getrelid(resultRelationIndex, rangeTable); resultRelation = heap_open(resultRelationOid, RowExclusiveLock); InitResultRelInfo(resultRelInfo, resultRelation, resultRelationIndex, NULL, estate->es_instrument); resultRelInfo++; } estate->es_result_relations = resultRelInfos; estate->es_num_result_relations = numResultRelations; /* es_result_relation_info is NULL except when within ModifyTable */ estate->es_result_relation_info = NULL; /* * In the partitioned result relation case, lock the non-leaf result * relations too. A subset of these are the roots of respective * partitioned tables, for which we also allocate ResultRelInfos. */ estate->es_root_result_relations = NULL; estate->es_num_root_result_relations = 0; if (plannedstmt->nonleafResultRelations) { int num_roots = list_length(plannedstmt->rootResultRelations); /* * Firstly, build ResultRelInfos for all the partitioned table * roots, because we will need them to fire the statement-level * triggers, if any. */ resultRelInfos = (ResultRelInfo *) palloc(num_roots * sizeof(ResultRelInfo)); resultRelInfo = resultRelInfos; foreach(l, plannedstmt->rootResultRelations) { Index resultRelIndex = lfirst_int(l); Oid resultRelOid; Relation resultRelDesc; resultRelOid = getrelid(resultRelIndex, rangeTable); resultRelDesc = heap_open(resultRelOid, RowExclusiveLock); InitResultRelInfo(resultRelInfo, resultRelDesc, lfirst_int(l), NULL, estate->es_instrument); resultRelInfo++; } estate->es_root_result_relations = resultRelInfos; estate->es_num_root_result_relations = num_roots; /* Simply lock the rest of them. */ foreach(l, plannedstmt->nonleafResultRelations) { Index resultRelIndex = lfirst_int(l); /* We locked the roots above. */ if (!list_member_int(plannedstmt->rootResultRelations, resultRelIndex)) LockRelationOid(getrelid(resultRelIndex, rangeTable), RowExclusiveLock); } } } else { /* * if no result relation, then set state appropriately */ estate->es_result_relations = NULL; estate->es_num_result_relations = 0; estate->es_result_relation_info = NULL; estate->es_root_result_relations = NULL; estate->es_num_root_result_relations = 0; } /* * Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE * before we initialize the plan tree, else we'd be risking lock upgrades. * While we are at it, build the ExecRowMark list. Any partitioned child * tables are ignored here (because isParent=true) and will be locked by * the first Append or MergeAppend node that references them. (Note that * the RowMarks corresponding to partitioned child tables are present in * the same list as the rest, i.e., plannedstmt->rowMarks.) */ estate->es_rowMarks = NIL; foreach(l, plannedstmt->rowMarks) { PlanRowMark *rc = (PlanRowMark *) lfirst(l); Oid relid; Relation relation; ExecRowMark *erm; /* ignore "parent" rowmarks; they are irrelevant at runtime */ if (rc->isParent) continue; /* get relation's OID (will produce InvalidOid if subquery) */ relid = getrelid(rc->rti, rangeTable); /* * If you change the conditions under which rel locks are acquired * here, be sure to adjust ExecOpenScanRelation to match. */ switch (rc->markType) { case ROW_MARK_EXCLUSIVE: case ROW_MARK_NOKEYEXCLUSIVE: case ROW_MARK_SHARE: case ROW_MARK_KEYSHARE: relation = heap_open(relid, RowShareLock); break; case ROW_MARK_REFERENCE: relation = heap_open(relid, AccessShareLock); break; case ROW_MARK_COPY: /* no physical table access is required */ relation = NULL; break; default: elog(ERROR, "unrecognized markType: %d", rc->markType); relation = NULL; /* keep compiler quiet */ break; } /* Check that relation is a legal target for marking */ if (relation) CheckValidRowMarkRel(relation, rc->markType); erm = (ExecRowMark *) palloc(sizeof(ExecRowMark)); erm->relation = relation; erm->relid = relid; erm->rti = rc->rti; erm->prti = rc->prti; erm->rowmarkId = rc->rowmarkId; erm->markType = rc->markType; erm->strength = rc->strength; erm->waitPolicy = rc->waitPolicy; erm->ermActive = false; ItemPointerSetInvalid(&(erm->curCtid)); erm->ermExtra = NULL; estate->es_rowMarks = lappend(estate->es_rowMarks, erm); } /* * Initialize the executor's tuple table to empty. */ estate->es_tupleTable = NIL; estate->es_trig_tuple_slot = NULL; estate->es_trig_oldtup_slot = NULL; estate->es_trig_newtup_slot = NULL; /* mark EvalPlanQual not active */ estate->es_epqTuple = NULL; estate->es_epqTupleSet = NULL; estate->es_epqScanDone = NULL; /* * Initialize private state information for each SubPlan. We must do this * before running ExecInitNode on the main query tree, since * ExecInitSubPlan expects to be able to find these entries. */ Assert(estate->es_subplanstates == NIL); i = 1; /* subplan indices count from 1 */ //初始化子Plan foreach(l, plannedstmt->subplans) { Plan *subplan = (Plan *) lfirst(l); PlanState *subplanstate; int sp_eflags; /* * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If * it is a parameterless subplan (not initplan), we suggest that it be * prepared to handle REWIND efficiently; otherwise there is no need. */ sp_eflags = eflags & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA); if (bms_is_member(i, plannedstmt->rewindPlanIDs)) sp_eflags |= EXEC_FLAG_REWIND; subplanstate = ExecInitNode(subplan, estate, sp_eflags); estate->es_subplanstates = lappend(estate->es_subplanstates, subplanstate); i++; } /* * Initialize the private state information for all the nodes in the query * tree. This opens files, allocates storage and leaves us ready to start * processing tuples. */ planstate = ExecInitNode(plan, estate, eflags); /* * Get the tuple descriptor describing the type of tuples to return. */ tupType = ExecGetResultType(planstate); /* * Initialize the junk filter if needed. SELECT queries need a filter if * there are any junk attrs in the top-level tlist. */ if (operation == CMD_SELECT) { bool junk_filter_needed = false; ListCell *tlist; foreach(tlist, plan->targetlist) { TargetEntry *tle = (TargetEntry *) lfirst(tlist); if (tle->resjunk) { junk_filter_needed = true; break; } } if (junk_filter_needed) { JunkFilter *j; j = ExecInitJunkFilter(planstate->plan->targetlist, tupType->tdhasoid, ExecInitExtraTupleSlot(estate, NULL)); estate->es_junkFilter = j; /* Want to return the cleaned tuple type */ tupType = j->jf_cleanTupType; } } queryDesc->tupDesc = tupType; queryDesc->planstate = planstate; }/* * ExecCheckRTPerms * Check access permissions for all relations listed in a range table. * * Returns true if permissions are adequate. Otherwise, throws an appropriate * error if ereport_on_violation is true, or simply returns false otherwise. * * Note that this does NOT address row level security policies (aka: RLS). If * rows will be returned to the user as a result of this permission check * passing, then RLS also needs to be consulted (and check_enable_rls()). * * See rewrite/rowsecurity.c. */ bool ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation) { ListCell *l; bool result = true; foreach(l, rangeTable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); result = ExecCheckRTEPerms(rte);//基于ACL Mode的权限检查 if (!result) { Assert(rte->rtekind == RTE_RELATION); if (ereport_on_violation) aclcheck_error(ACLCHECK_NO_PRIV, get_relkind_objtype(get_rel_relkind(rte->relid)), get_rel_name(rte->relid)); return false; } } if (ExecutorCheckPerms_hook) result = (*ExecutorCheckPerms_hook) (rangeTable, ereport_on_violation); return result; } /* ------------------------------------------------------------------------ * ExecInitNode * * Recursively initializes all the nodes in the plan tree rooted * at 'node'. * * Inputs: * 'node' is the current node of the plan produced by the query planner * 'estate' is the shared execution state for the plan tree * 'eflags' is a bitwise OR of flag bits described in executor.h * * Returns a PlanState node corresponding to the given Plan node. * ------------------------------------------------------------------------ */ //初始化节点,返回Plan状态 PlanState * ExecInitNode(Plan *node, EState *estate, int eflags) { PlanState *result; List *subps; ListCell *l; /* * do nothing when we get to the end of a leaf on tree. */ if (node == NULL) return NULL; /* * Make sure there's enough stack available. Need to check here, in * addition to ExecProcNode() (via ExecProcNodeFirst()), to ensure the * stack isn't overrun while initializing the node tree. */ check_stack_depth(); switch (nodeTag(node)) { /* * control nodes */ case T_Result: result = (PlanState *) ExecInitResult((Result *) node, estate, eflags); break; case T_ProjectSet: result = (PlanState *) ExecInitProjectSet((ProjectSet *) node, estate, eflags); break; case T_ModifyTable://插入数据 result = (PlanState *) ExecInitModifyTable((ModifyTable *) node, estate, eflags); break; case T_Append: result = (PlanState *) ExecInitAppend((Append *) node, estate, eflags); break; case T_MergeAppend: result = (PlanState *) ExecInitMergeAppend((MergeAppend *) node, estate, eflags); break; case T_RecursiveUnion: result = (PlanState *) ExecInitRecursiveUnion((RecursiveUnion *) node, estate, eflags); break; case T_BitmapAnd: result = (PlanState *) ExecInitBitmapAnd((BitmapAnd *) node, estate, eflags); break; case T_BitmapOr: result = (PlanState *) ExecInitBitmapOr((BitmapOr *) node, estate, eflags); break; /* * scan nodes */ case T_SeqScan: result = (PlanState *) ExecInitSeqScan((SeqScan *) node, estate, eflags); break; case T_SampleScan: result = (PlanState *) ExecInitSampleScan((SampleScan *) node, estate, eflags); break; case T_IndexScan: result = (PlanState *) ExecInitIndexScan((IndexScan *) node, estate, eflags); break; case T_IndexOnlyScan: result = (PlanState *) ExecInitIndexOnlyScan((IndexOnlyScan *) node, estate, eflags); break; case T_BitmapIndexScan: result = (PlanState *) ExecInitBitmapIndexScan((BitmapIndexScan *) node, estate, eflags); break; case T_BitmapHeapScan: result = (PlanState *) ExecInitBitmapHeapScan((BitmapHeapScan *) node, estate, eflags); break; case T_TidScan: result = (PlanState *) ExecInitTidScan((TidScan *) node, estate, eflags); break; case T_SubqueryScan: result = (PlanState *) ExecInitSubqueryScan((SubqueryScan *) node, estate, eflags); break; case T_FunctionScan: result = (PlanState *) ExecInitFunctionScan((FunctionScan *) node, estate, eflags); break; case T_TableFuncScan: result = (PlanState *) ExecInitTableFuncScan((TableFuncScan *) node, estate, eflags); break; case T_ValuesScan: result = (PlanState *) ExecInitValuesScan((ValuesScan *) node, estate, eflags); break; case T_CteScan: result = (PlanState *) ExecInitCteScan((CteScan *) node, estate, eflags); break; case T_NamedTuplestoreScan: result = (PlanState *) ExecInitNamedTuplestoreScan((NamedTuplestoreScan *) node, estate, eflags); break; case T_WorkTableScan: result = (PlanState *) ExecInitWorkTableScan((WorkTableScan *) node, estate, eflags); break; case T_ForeignScan: result = (PlanState *) ExecInitForeignScan((ForeignScan *) node, estate, eflags); break; case T_CustomScan: result = (PlanState *) ExecInitCustomScan((CustomScan *) node, estate, eflags); break; /* * join nodes */ case T_NestLoop: result = (PlanState *) ExecInitNestLoop((NestLoop *) node, estate, eflags); break; case T_MergeJoin: result = (PlanState *) ExecInitMergeJoin((MergeJoin *) node, estate, eflags); break; case T_HashJoin: result = (PlanState *) ExecInitHashJoin((HashJoin *) node, estate, eflags); break; /* * materialization nodes */ case T_Material: result = (PlanState *) ExecInitMaterial((Material *) node, estate, eflags); break; case T_Sort: result = (PlanState *) ExecInitSort((Sort *) node, estate, eflags); break; case T_Group: result = (PlanState *) ExecInitGroup((Group *) node, estate, eflags); break; case T_Agg: result = (PlanState *) ExecInitAgg((Agg *) node, estate, eflags); break; case T_WindowAgg: result = (PlanState *) ExecInitWindowAgg((WindowAgg *) node, estate, eflags); break; case T_Unique: result = (PlanState *) ExecInitUnique((Unique *) node, estate, eflags); break; case T_Gather: result = (PlanState *) ExecInitGather((Gather *) node, estate, eflags); break; case T_GatherMerge: result = (PlanState *) ExecInitGatherMerge((GatherMerge *) node, estate, eflags); break; case T_Hash: result = (PlanState *) ExecInitHash((Hash *) node, estate, eflags); break; case T_SetOp: result = (PlanState *) ExecInitSetOp((SetOp *) node, estate, eflags); break; case T_LockRows: result = (PlanState *) ExecInitLockRows((LockRows *) node, estate, eflags); break; case T_Limit: result = (PlanState *) ExecInitLimit((Limit *) node, estate, eflags); break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); result = NULL; /* keep compiler quiet */ break; } ExecSetExecProcNode(result, result->ExecProcNode); /* * Initialize any initPlans present in this node. The planner put them in * a separate list for us. */ subps = NIL; foreach(l, node->initPlan) { SubPlan *subplan = (SubPlan *) lfirst(l); SubPlanState *sstate; Assert(IsA(subplan, SubPlan)); sstate = ExecInitSubPlan(subplan, result); subps = lappend(subps, sstate); } result->initPlan = subps; /* Set up instrumentation for this node if requested */ if (estate->es_instrument) result->instrument = InstrAlloc(1, estate->es_instrument); return result; } /* ---------------------------------------------------------------- * ExecInitModifyTable * ---------------------------------------------------------------- */ ModifyTableState * ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) { ModifyTableState *mtstate;//返回结果 CmdType operation = node->operation;//操作类型 int nplans = list_length(node->plans);//节点中的plan个数 ResultRelInfo *saved_resultRelInfo; ResultRelInfo *resultRelInfo;//结果Relation信息 Plan *subplan;//子Plan ListCell *l;//临时变量 int i; Relation rel; bool update_tuple_routing_needed = node->partColsUpdated; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * create state structure */ mtstate = makeNode(ModifyTableState);//构建节点 mtstate->ps.plan = (Plan *) node;//设置Plan mtstate->ps.state = estate;//设置执行状态 mtstate->ps.ExecProcNode = ExecModifyTable;//设置处理函数为ExecModifyTable mtstate->operation = operation;//操作类型 mtstate->canSetTag = node->canSetTag; mtstate->mt_done = false; mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);//分配内存 mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;//结果Relation信息 /* If modifying a partitioned table, initialize the root table info */ if (node->rootResultRelIndex >= 0) mtstate->rootResultRelInfo = estate->es_root_result_relations + node->rootResultRelIndex; mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans); mtstate->mt_nplans = nplans; /* set up epqstate with dummy subplan data for the moment */ EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam); mtstate->fireBSTriggers = true; /* * call ExecInitNode on each of the plans to be executed and save the * results into the array "mt_plans". This is also a convenient place to * verify that the proposed target relations are valid and open their * indexes for insertion of new index entries. Note we *must* set * estate->es_result_relation_info correctly while we initialize each * sub-plan; ExecContextForcesOids depends on that! */ saved_resultRelInfo = estate->es_result_relation_info; resultRelInfo = mtstate->resultRelInfo; i = 0; //初始化每个子Plan,保存在mt_plans数组中 foreach(l, node->plans) { subplan = (Plan *) lfirst(l); /* Initialize the usesFdwDirectModify flag */ resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i, node->fdwDirectModifyPlans); /* * Verify result relation is a valid target for the current operation */ CheckValidResultRel(resultRelInfo, operation); /* * If there are indices on the result relation, open them and save * descriptors in the result relation info, so that we can add new * index entries for the tuples we add/update. We need not do this * for a DELETE, however, since deletion doesn't affect indexes. Also, * inside an EvalPlanQual operation, the indexes might be open * already, since we share the resultrel state with the original * query. */ if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex && operation != CMD_DELETE && resultRelInfo->ri_IndexRelationDescs == NULL) ExecOpenIndices(resultRelInfo, node->onConflictAction != ONCONFLICT_NONE);//初始化Index /* * If this is an UPDATE and a BEFORE UPDATE trigger is present, the * trigger itself might modify the partition-key values. So arrange * for tuple routing. */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_update_before_row && operation == CMD_UPDATE) update_tuple_routing_needed = true; /* Now init the plan for this result rel */ estate->es_result_relation_info = resultRelInfo; mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);//初始化子节点 /* Also let FDWs init themselves for foreign-table result rels */ if (!resultRelInfo->ri_usesFdwDirectModify && resultRelInfo->ri_FdwRoutine != NULL && resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL) { List *fdw_private = (List *) list_nth(node->fdwPrivLists, i); resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate, resultRelInfo, fdw_private, i, eflags); } resultRelInfo++; i++; } estate->es_result_relation_info = saved_resultRelInfo; /* Get the target relation */ rel = (getTargetResultRelInfo(mtstate))->ri_RelationDesc; /* * If it's not a partitioned table after all, UPDATE tuple routing should * not be attempted. */ if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) update_tuple_routing_needed = false; /* * Build state for tuple routing if it's an INSERT or if it's an UPDATE of * partition key. */ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && (operation == CMD_INSERT || update_tuple_routing_needed)) mtstate->mt_partition_tuple_routing = ExecSetupPartitionTupleRouting(mtstate, rel); /* * Build state for collecting transition tuples. This requires having a * valid trigger query context, so skip it in explain-only mode. */ if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY)) ExecSetupTransitionCaptureState(mtstate, estate); /* * Construct mapping from each of the per-subplan partition attnos to the * root attno. This is required when during update row movement the tuple * descriptor of a source partition does not match the root partitioned * table descriptor. In such a case we need to convert tuples to the root * tuple descriptor, because the search for destination partition starts * from the root. Skip this setup if it's not a partition key update. */ if (update_tuple_routing_needed) ExecSetupChildParentMapForSubplan(mtstate); /* * Initialize any WITH CHECK OPTION constraints if needed. */ resultRelInfo = mtstate->resultRelInfo; i = 0; //设置Check选项 foreach(l, node->withCheckOptionLists) { List *wcoList = (List *) lfirst(l); List *wcoExprs = NIL; ListCell *ll; foreach(ll, wcoList) { WithCheckOption *wco = (WithCheckOption *) lfirst(ll); ExprState *wcoExpr = ExecInitQual((List *) wco->qual, mtstate->mt_plans[i]); wcoExprs = lappend(wcoExprs, wcoExpr); } resultRelInfo->ri_WithCheckOptions = wcoList; resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; resultRelInfo++; i++; } /* * Initialize RETURNING projections if needed. */ if (node->returningLists) { TupleTableSlot *slot; ExprContext *econtext; /* * Initialize result tuple slot and assign its rowtype using the first * RETURNING list. We assume the rest will look the same. */ mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists); /* Set up a slot for the output of the RETURNING projection(s) */ ExecInitResultTupleSlotTL(estate, &mtstate->ps); slot = mtstate->ps.ps_ResultTupleSlot; /* Need an econtext too */ if (mtstate->ps.ps_ExprContext == NULL) ExecAssignExprContext(estate, &mtstate->ps); econtext = mtstate->ps.ps_ExprContext; /* * Build a projection for each result rel. */ resultRelInfo = mtstate->resultRelInfo; foreach(l, node->returningLists) { List *rlist = (List *) lfirst(l); resultRelInfo->ri_returningList = rlist; resultRelInfo->ri_projectReturning = ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, resultRelInfo->ri_RelationDesc->rd_att); resultRelInfo++; } } else { /* * We still must construct a dummy result tuple type, because InitPlan * expects one (maybe should change that?). */ mtstate->ps.plan->targetlist = NIL; ExecInitResultTupleSlotTL(estate, &mtstate->ps); mtstate->ps.ps_ExprContext = NULL; } /* Set the list of arbiter indexes if needed for ON CONFLICT */ resultRelInfo = mtstate->resultRelInfo; if (node->onConflictAction != ONCONFLICT_NONE) resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes; /* * If needed, Initialize target list, projection and qual for ON CONFLICT * DO UPDATE. */ if (node->onConflictAction == ONCONFLICT_UPDATE) { ExprContext *econtext; TupleDesc relationDesc; TupleDesc tupDesc; /* insert may only have one plan, inheritance is not expanded */ Assert(nplans == 1); /* already exists if created by RETURNING processing above */ if (mtstate->ps.ps_ExprContext == NULL) ExecAssignExprContext(estate, &mtstate->ps); econtext = mtstate->ps.ps_ExprContext; relationDesc = resultRelInfo->ri_RelationDesc->rd_att; /* * Initialize slot for the existing tuple. If we'll be performing * tuple routing, the tuple descriptor to use for this will be * determined based on which relation the update is actually applied * to, so we don't set its tuple descriptor here. */ mtstate->mt_existing = ExecInitExtraTupleSlot(mtstate->ps.state, mtstate->mt_partition_tuple_routing ? NULL : relationDesc); /* carried forward solely for the benefit of explain */ mtstate->mt_excludedtlist = node->exclRelTlist; /* create state for DO UPDATE SET operation */ resultRelInfo->ri_onConflict = makeNode(OnConflictSetState); /* * Create the tuple slot for the UPDATE SET projection. * * Just like mt_existing above, we leave it without a tuple descriptor * in the case of partitioning tuple routing, so that it can be * changed by ExecPrepareTupleRouting. In that case, we still save * the tupdesc in the parent's state: it can be reused by partitions * with an identical descriptor to the parent. */ tupDesc = ExecTypeFromTL((List *) node->onConflictSet, relationDesc->tdhasoid); mtstate->mt_conflproj = ExecInitExtraTupleSlot(mtstate->ps.state, mtstate->mt_partition_tuple_routing ? NULL : tupDesc); resultRelInfo->ri_onConflict->oc_ProjTupdesc = tupDesc; /* build UPDATE SET projection state */ resultRelInfo->ri_onConflict->oc_ProjInfo = ExecBuildProjectionInfo(node->onConflictSet, econtext, mtstate->mt_conflproj, &mtstate->ps, relationDesc); /* initialize state to evaluate the WHERE clause, if any */ if (node->onConflictWhere) { ExprState *qualexpr; qualexpr = ExecInitQual((List *) node->onConflictWhere, &mtstate->ps); resultRelInfo->ri_onConflict->oc_WhereClause = qualexpr; } } /* * If we have any secondary relations in an UPDATE or DELETE, they need to * be treated like non-locked relations in SELECT FOR UPDATE, ie, the * EvalPlanQual mechanism needs to be told about them. Locate the * relevant ExecRowMarks. */ foreach(l, node->rowMarks) { PlanRowMark *rc = lfirst_node(PlanRowMark, l); ExecRowMark *erm; /* ignore "parent" rowmarks; they are irrelevant at runtime */ if (rc->isParent) continue; /* find ExecRowMark (same for all subplans) */ erm = ExecFindRowMark(estate, rc->rti, false); /* build ExecAuxRowMark for each subplan */ for (i = 0; i < nplans; i++) { ExecAuxRowMark *aerm; subplan = mtstate->mt_plans[i]->plan; aerm = ExecBuildAuxRowMark(erm, subplan->targetlist); mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm); } } /* select first subplan */ mtstate->mt_whichplan = 0; subplan = (Plan *) linitial(node->plans); EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan, mtstate->mt_arowmarks[0]); /* * Initialize the junk filter(s) if needed. INSERT queries need a filter * if there are any junk attrs in the tlist. UPDATE and DELETE always * need a filter, since there's always at least one junk attribute present * --- no need to look first. Typically, this will be a 'ctid' or * 'wholerow' attribute, but in the case of a foreign data wrapper it * might be a set of junk attributes sufficient to identify the remote * row. * * If there are multiple result relations, each one needs its own junk * filter. Note multiple rels are only possible for UPDATE/DELETE, so we * can't be fooled by some needing a filter and some not. * * This section of code is also a convenient place to verify that the * output of an INSERT or UPDATE matches the target table(s). */ { bool junk_filter_needed = false; switch (operation) { case CMD_INSERT: foreach(l, subplan->targetlist) { TargetEntry *tle = (TargetEntry *) lfirst(l); if (tle->resjunk) { junk_filter_needed = true; break; } } break; case CMD_UPDATE: case CMD_DELETE: junk_filter_needed = true; break; default: elog(ERROR, "unknown operation"); break; } if (junk_filter_needed) { resultRelInfo = mtstate->resultRelInfo; for (i = 0; i < nplans; i++) { JunkFilter *j; subplan = mtstate->mt_plans[i]->plan; if (operation == CMD_INSERT || operation == CMD_UPDATE) ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc, subplan->targetlist); j = ExecInitJunkFilter(subplan->targetlist, resultRelInfo->ri_RelationDesc->rd_att->tdhasoid, ExecInitExtraTupleSlot(estate, NULL)); if (operation == CMD_UPDATE || operation == CMD_DELETE) { /* For UPDATE/DELETE, find the appropriate junk attr now */ char relkind; relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind; if (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW || relkind == RELKIND_PARTITIONED_TABLE) { j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid"); if (!AttributeNumberIsValid(j->jf_junkAttNo)) elog(ERROR, "could not find junk ctid column"); } else if (relkind == RELKIND_FOREIGN_TABLE) { /* * When there is a row-level trigger, there should be * a wholerow attribute. */ j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow"); } else { j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow"); if (!AttributeNumberIsValid(j->jf_junkAttNo)) elog(ERROR, "could not find junk wholerow column"); } } resultRelInfo->ri_junkFilter = j; resultRelInfo++; } } else { if (operation == CMD_INSERT) ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc, subplan->targetlist); } } /* * Set up a tuple table slot for use for trigger output tuples. In a plan * containing multiple ModifyTable nodes, all can share one such slot, so * we keep it in the estate. */ if (estate->es_trig_tuple_slot == NULL) estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL); /* * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it * to estate->es_auxmodifytables so that it will be run to completion by * ExecPostprocessPlan. (It'd actually work fine to add the primary * ModifyTable node too, but there's no need.) Note the use of lcons not * lappend: we need later-initialized ModifyTable nodes to be shut down * before earlier ones. This ensures that we don't throw away RETURNING * rows that need to be seen by a later CTE subplan. */ if (!mtstate->canSetTag) estate->es_auxmodifytables = lcons(mtstate, estate->es_auxmodifytables); return mtstate; } /* ---------------- * ModifyTable node - * Apply rows produced by subplan(s) to result table(s), * by inserting, updating, or deleting. * * Note that rowMarks and epqParam are presumed to be valid for all the * subplan(s); they can't contain any info that varies across subplans. * ---------------- */ typedef struct ModifyTable { Plan plan; CmdType operation; /* INSERT, UPDATE, or DELETE */ bool canSetTag; /* do we set the command tag/es_processed? */ Index nominalRelation; /* Parent RT index for use of EXPLAIN */ /* RT indexes of non-leaf tables in a partition tree */ List *partitioned_rels; bool partColsUpdated; /* some part key in hierarchy updated */ List *resultRelations; /* integer list of RT indexes */ int resultRelIndex; /* index of first resultRel in plan's list */ int rootResultRelIndex; /* index of the partitioned table root */ List *plans; /* plan(s) producing source data */ List *withCheckOptionLists; /* per-target-table WCO lists */ List *returningLists; /* per-target-table RETURNING tlists */ List *fdwPrivLists; /* per-target-table FDW private data lists */ Bitmapset *fdwDirectModifyPlans; /* indices of FDW DM plans */ List *rowMarks; /* PlanRowMarks (non-locking only) */ int epqParam; /* ID of Param for EvalPlanQual re-eval */ OnConflictAction onConflictAction; /* ON CONFLICT action */ List *arbiterIndexes; /* List of ON CONFLICT arbiter index OIDs */ List *onConflictSet; /* SET for INSERT ON CONFLICT DO UPDATE */ Node *onConflictWhere; /* WHERE for ON CONFLICT UPDATE */ Index exclRelRTI; /* RTI of the EXCLUDED pseudo relation */ List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ } ModifyTable;
4、ExecutorStart
/* ---------------------------------------------------------------- * ExecutorStart * * This routine must be called at the beginning of any execution of any * query plan * * Takes a QueryDesc previously created by CreateQueryDesc (which is separate * only because some places use QueryDescs for utility commands). The tupDesc * field of the QueryDesc is filled in to describe the tuples that will be * returned, and the internal fields (estate and planstate) are set up. * * eflags contains flag bits as described in executor.h. * * NB: the CurrentMemoryContext when this is called will become the parent * of the per-query context used for this Executor invocation. * * We provide a function hook variable that lets loadable plugins * get control when ExecutorStart is called. Such a plugin would * normally call standard_ExecutorStart(). * * ---------------------------------------------------------------- */ void ExecutorStart(QueryDesc *queryDesc, int eflags)//eflags见后 { if (ExecutorStart_hook) (*ExecutorStart_hook) (queryDesc, eflags);//提供了钩子函数 else standard_ExecutorStart(queryDesc, eflags);//标准函数 } void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)//标准函数 { EState *estate;//执行器状态信息 MemoryContext oldcontext;//原内存上下文 /* sanity checks: queryDesc must not be started already */ Assert(queryDesc != NULL); Assert(queryDesc->estate == NULL); /* * If the transaction is read-only, we need to check if any writes are * planned to non-temporary tables. EXPLAIN is considered read-only. * * Don't allow writes in parallel mode. Supporting UPDATE and DELETE * would require (a) storing the combocid hash in shared memory, rather * than synchronizing it just once at the start of parallelism, and (b) an * alternative to heap_update()'s reliance on xmax for mutual exclusion. * INSERT may have no such troubles, but we forbid it to simplify the * checks. * * We have lower-level defenses in CommandCounterIncrement and elsewhere * against performing unsafe operations in parallel mode, but this gives a * more user-friendly error message. */ if ((XactReadOnly || IsInParallelMode()) && !(eflags & EXEC_FLAG_EXPLAIN_ONLY)) ExecCheckXactReadOnly(queryDesc->plannedstmt);//ReadOnly? /* * Build EState, switch into per-query memory context for startup. */ estate = CreateExecutorState();//创建执行器状态信息 queryDesc->estate = estate;//赋值 oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);//切换上下文 /* * Fill in external parameters, if any, from queryDesc; and allocate * workspace for internal parameters */ estate->es_param_list_info = queryDesc->params;//设置参数 if (queryDesc->plannedstmt->paramExecTypes != NIL)//TODO { int nParamExec; nParamExec = list_length(queryDesc->plannedstmt->paramExecTypes); estate->es_param_exec_vals = (ParamExecData *) palloc0(nParamExec * sizeof(ParamExecData)); } estate->es_sourceText = queryDesc->sourceText;//源SQL语句 /* * Fill in the query environment, if any, from queryDesc. */ estate->es_queryEnv = queryDesc->queryEnv;//查询环境 /* * If non-read-only query, set the command ID to mark output tuples with */ switch (queryDesc->operation) { case CMD_SELECT://查询语句 TODO /* * SELECT FOR [KEY] UPDATE/SHARE and modifying CTEs need to mark * tuples */ if (queryDesc->plannedstmt->rowMarks != NIL || queryDesc->plannedstmt->hasModifyingCTE) estate->es_output_cid = GetCurrentCommandId(true); /* * A SELECT without modifying CTEs can't possibly queue triggers, * so force skip-triggers mode. This is just a marginal efficiency * hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't * all that expensive, but we might as well do it. */ if (!queryDesc->plannedstmt->hasModifyingCTE) eflags |= EXEC_FLAG_SKIP_TRIGGERS; break; case CMD_INSERT://插入语句 case CMD_DELETE: case CMD_UPDATE: estate->es_output_cid = GetCurrentCommandId(true); break; default: elog(ERROR, "unrecognized operation code: %d", (int) queryDesc->operation); break; } /* * Copy other important information into the EState */ estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot); estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot); estate->es_top_eflags = eflags; estate->es_instrument = queryDesc->instrument_options; estate->es_jit_flags = queryDesc->plannedstmt->jitFlags; /* * Set up an AFTER-trigger statement context, unless told not to, or * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called). */ if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY))) AfterTriggerBeginQuery(); /* * Initialize the plan state tree */ InitPlan(queryDesc, eflags);//初始化Plan State tree MemoryContextSwitchTo(oldcontext); } /* * GetCurrentCommandId * * "used" must be true if the caller intends to use the command ID to mark * inserted/updated/deleted tuples. false means the ID is being fetched * for read-only purposes (ie, as a snapshot validity cutoff). See * CommandCounterIncrement() for discussion. */ CommandId GetCurrentCommandId(bool used) { /* this is global to a transaction, not subtransaction-local */ if (used) { /* * Forbid setting currentCommandIdUsed in a parallel worker, because * we have no provision for communicating this back to the master. We * could relax this restriction when currentCommandIdUsed was already * true at the start of the parallel operation. */ Assert(!IsParallelWorker()); currentCommandIdUsed = true; } return currentCommandId; } /* * The "eflags" argument to ExecutorStart and the various ExecInitNode * routines is a bitwise OR of the following flag bits, which tell the * called plan node what to expect. Note that the flags will get modified * as they are passed down the plan tree, since an upper node may require * functionality in its subnode not demanded of the plan as a whole * (example: MergeJoin requires mark/restore capability in its inner input), * or an upper node may shield its input from some functionality requirement * (example: Materialize shields its input from needing to do backward scan). * * EXPLAIN_ONLY indicates that the plan tree is being initialized just so * EXPLAIN can print it out; it will not be run. Hence, no side-effects * of startup should occur. However, error checks (such as permission checks) * should be performed. * * REWIND indicates that the plan node should try to efficiently support * rescans without parameter changes. (Nodes must support ExecReScan calls * in any case, but if this flag was not given, they are at liberty to do it * through complete recalculation. Note that a parameter change forces a * full recalculation in any case.) * * BACKWARD indicates that the plan node must respect the es_direction flag. * When this is not passed, the plan node will only be run forwards. * * MARK indicates that the plan node must support Mark/Restore calls. * When this is not passed, no Mark/Restore will occur. * * SKIP_TRIGGERS tells ExecutorStart/ExecutorFinish to skip calling * AfterTriggerBeginQuery/AfterTriggerEndQuery. This does not necessarily * mean that the plan can't queue any AFTER triggers; just that the caller * is responsible for there being a trigger context for them to be queued in. * * WITH/WITHOUT_OIDS tell the executor to emit tuples with or without space * for OIDs, respectively. These are currently used only for CREATE TABLE AS. * If neither is set, the plan may or may not produce tuples including OIDs. */#define EXEC_FLAG_EXPLAIN_ONLY 0x0001 /* EXPLAIN, no ANALYZE */#define EXEC_FLAG_REWIND 0x0002 /* need efficient rescan */#define EXEC_FLAG_BACKWARD 0x0004 /* need backward scan */#define EXEC_FLAG_MARK 0x0008 /* need mark/restore */#define EXEC_FLAG_SKIP_TRIGGERS 0x0010 /* skip AfterTrigger calls */#define EXEC_FLAG_WITH_OIDS 0x0020 /* force OIDs in returned tuples */#define EXEC_FLAG_WITHOUT_OIDS 0x0040 /* force no OIDs in returned tuples */#define EXEC_FLAG_WITH_NO_DATA 0x0080 /* rel scannability doesn't matter */
5、ExecutorRun
//上一节已介绍
6、ExecutorFinish
/* ---------------------------------------------------------------- * ExecutorFinish * * This routine must be called after the last ExecutorRun call. * It performs cleanup such as firing AFTER triggers. It is * separate from ExecutorEnd because EXPLAIN ANALYZE needs to * include these actions in the total runtime. * * We provide a function hook variable that lets loadable plugins * get control when ExecutorFinish is called. Such a plugin would * normally call standard_ExecutorFinish(). * * ---------------------------------------------------------------- */ void ExecutorFinish(QueryDesc *queryDesc) { if (ExecutorFinish_hook) (*ExecutorFinish_hook) (queryDesc); else standard_ExecutorFinish(queryDesc); } void standard_ExecutorFinish(QueryDesc *queryDesc) { EState *estate; MemoryContext oldcontext; /* sanity checks */ Assert(queryDesc != NULL); estate = queryDesc->estate; Assert(estate != NULL); Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); /* This should be run once and only once per Executor instance */ Assert(!estate->es_finished); /* Switch into per-query memory context */ oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); /* Allow instrumentation of Executor overall runtime */ if (queryDesc->totaltime) InstrStartNode(queryDesc->totaltime); /* Run ModifyTable nodes to completion */ ExecPostprocessPlan(estate); /* Execute queued AFTER triggers, unless told not to */ if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS)) AfterTriggerEndQuery(estate); if (queryDesc->totaltime) InstrStopNode(queryDesc->totaltime, 0); MemoryContextSwitchTo(oldcontext); estate->es_finished = true; } /* ---------------------------------------------------------------- * ExecPostprocessPlan * * Give plan nodes a final chance to execute before shutdown * ---------------------------------------------------------------- */ static void ExecPostprocessPlan(EState *estate) { ListCell *lc; /* * Make sure nodes run forward. */ estate->es_direction = ForwardScanDirection; /* * Run any secondary ModifyTable nodes to completion, in case the main * query did not fetch all rows from them. (We do this to ensure that * such nodes have predictable results.) */ foreach(lc, estate->es_auxmodifytables) { PlanState *ps = (PlanState *) lfirst(lc); for (;;) { TupleTableSlot *slot; /* Reset the per-output-tuple exprcontext each time */ ResetPerTupleExprContext(estate); slot = ExecProcNode(ps); if (TupIsNull(slot)) break; } }
7、ExecutorEnd
/* ---------------------------------------------------------------- * ExecutorEnd * * This routine must be called at the end of execution of any * query plan * * We provide a function hook variable that lets loadable plugins * get control when ExecutorEnd is called. Such a plugin would * normally call standard_ExecutorEnd(). * * ---------------------------------------------------------------- */ void ExecutorEnd(QueryDesc *queryDesc) { if (ExecutorEnd_hook) (*ExecutorEnd_hook) (queryDesc); else standard_ExecutorEnd(queryDesc); } void standard_ExecutorEnd(QueryDesc *queryDesc) { EState *estate; MemoryContext oldcontext; /* sanity checks */ Assert(queryDesc != NULL); estate = queryDesc->estate; Assert(estate != NULL); /* * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This * Assert is needed because ExecutorFinish is new as of 9.1, and callers * might forget to call it. */ Assert(estate->es_finished || (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); /* * Switch into per-query memory context to run ExecEndPlan */ oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); ExecEndPlan(queryDesc->planstate, estate); /* do away with our snapshots */ UnregisterSnapshot(estate->es_snapshot); UnregisterSnapshot(estate->es_crosscheck_snapshot); /* * Must switch out of context before destroying it */ MemoryContextSwitchTo(oldcontext); /* * Release EState and per-query memory context. This should release * everything the executor has allocated. */ FreeExecutorState(estate); /* Reset queryDesc fields that no longer point to anything */ queryDesc->tupDesc = NULL; queryDesc->estate = NULL; queryDesc->planstate = NULL; queryDesc->totaltime = NULL; } /* ---------------------------------------------------------------- * ExecEndPlan * * Cleans up the query plan -- closes files and frees up storage * * NOTE: we are no longer very worried about freeing storage per se * in this code; FreeExecutorState should be guaranteed to release all * memory that needs to be released. What we are worried about doing * is closing relations and dropping buffer pins. Thus, for example, * tuple tables must be cleared or dropped to ensure pins are released. * ---------------------------------------------------------------- */ static void ExecEndPlan(PlanState *planstate, EState *estate) { ResultRelInfo *resultRelInfo; int i; ListCell *l; /* * shut down the node-type-specific query processing */ ExecEndNode(planstate); /* * for subplans too */ foreach(l, estate->es_subplanstates) { PlanState *subplanstate = (PlanState *) lfirst(l); ExecEndNode(subplanstate); } /* * destroy the executor's tuple table. Actually we only care about * releasing buffer pins and tupdesc refcounts; there's no need to pfree * the TupleTableSlots, since the containing memory context is about to go * away anyway. */ ExecResetTupleTable(estate->es_tupleTable, false); /* * close the result relation(s) if any, but hold locks until xact commit. */ resultRelInfo = estate->es_result_relations; for (i = estate->es_num_result_relations; i > 0; i--) { /* Close indices and then the relation itself */ ExecCloseIndices(resultRelInfo); heap_close(resultRelInfo->ri_RelationDesc, NoLock); resultRelInfo++; } /* Close the root target relation(s). */ resultRelInfo = estate->es_root_result_relations; for (i = estate->es_num_root_result_relations; i > 0; i--) { heap_close(resultRelInfo->ri_RelationDesc, NoLock); resultRelInfo++; } /* likewise close any trigger target relations */ ExecCleanUpTriggerState(estate); /* * close any relations selected FOR [KEY] UPDATE/SHARE, again keeping * locks */ foreach(l, estate->es_rowMarks) { ExecRowMark *erm = (ExecRowMark *) lfirst(l); if (erm->relation) heap_close(erm->relation, NoLock); } }
8、FreeQueryDesc
//释放资源 /* * FreeQueryDesc */ void FreeQueryDesc(QueryDesc *qdesc) { /* Can't be a live query */ Assert(qdesc->estate == NULL); /* forget our snapshots */ UnregisterSnapshot(qdesc->snapshot); UnregisterSnapshot(qdesc->crosscheck_snapshot); /* Only the QueryDesc itself need be freed */ pfree(qdesc); }
二、源码解读
/* * ProcessQuery * Execute a single plannable query within a PORTAL_MULTI_QUERY, * PORTAL_ONE_RETURNING, or PORTAL_ONE_MOD_WITH portal * * plan: the plan tree for the query * sourceText: the source text of the query * params: any parameters needed * dest: where to send results * completionTag: points to a buffer of size COMPLETION_TAG_BUFSIZE * in which to store a command completion status string. * * completionTag may be NULL if caller doesn't want a status string. * * Must be called in a memory context that will be reset or deleted on * error; otherwise the executor's memory usage will be leaked. *//*输入: plan-已生成执行计划的语句 sourceText-源SQL语句 params-TODO queryEnv-查询执行的环境 dest-目标接收器 completionTag-完成标记输出: 无*/static voidProcessQuery(PlannedStmt *plan, const char *sourceText, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, char *completionTag){ QueryDesc *queryDesc;//查询描述符 /* * Create the QueryDesc object */ queryDesc = CreateQueryDesc(plan, sourceText, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0);//构造查询描述符 /* * Call ExecutorStart to prepare the plan for execution */ ExecutorStart(queryDesc, 0);//启动执行器 /* * Run the plan to completion. */ ExecutorRun(queryDesc, ForwardScanDirection, 0L, true);//执行 /* * Build command completion status string, if caller wants one. */ if (completionTag)//如果需要完成标记 { Oid lastOid; switch (queryDesc->operation) { case CMD_SELECT: snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "SELECT " UINT64_FORMAT, queryDesc->estate->es_processed); break; case CMD_INSERT://插入语句 if (queryDesc->estate->es_processed == 1) lastOid = queryDesc->estate->es_lastoid; else lastOid = InvalidOid; snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "INSERT %u " UINT64_FORMAT, lastOid, queryDesc->estate->es_processed); break; case CMD_UPDATE: snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "UPDATE " UINT64_FORMAT, queryDesc->estate->es_processed); break; case CMD_DELETE: snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "DELETE " UINT64_FORMAT, queryDesc->estate->es_processed); break; default: strcpy(completionTag, "???"); break; } } /* * Now, we close down all the scans and free allocated resources. */ ExecutorFinish(queryDesc);//完成 ExecutorEnd(queryDesc);//结束 FreeQueryDesc(queryDesc);//释放资源}
三、跟踪分析
插入测试数据:
testdb=# -- #9.1 ProcessQuerytestdb=# -- 获取pidtestdb=# select pg_backend_pid(); pg_backend_pid ---------------- 2551(1 row)testdb=# -- 插入1行testdb=# insert into t_insert values(17,'ProcessQuery','ProcessQuery','ProcessQuery');(挂起)
启动gdb,跟踪调试:
[root@localhost ~]# gdb -p 2551GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-100.el7Copyright (C) 2013 Free Software Foundation, Inc....(gdb) b ProcessQueryBreakpoint 1 at 0x851d19: file pquery.c, line 149.(gdb) cContinuing.Breakpoint 1, ProcessQuery (plan=0x2ccb378, sourceText=0x2c09ef0 "insert into t_insert values(17,'ProcessQuery','ProcessQuery','ProcessQuery');", params=0x0, queryEnv=0x0, dest=0x2ccb4d8, completionTag=0x7ffe94ba4940 "") at pquery.c:149149 queryDesc = CreateQueryDesc(plan, sourceText,#查看参数#1、plan(gdb) p *plan$1 = {type = T_PlannedStmt, commandType = CMD_INSERT, queryId = 0, hasReturning = false, hasModifyingCTE = false, canSetTag = true, transientPlan = false, dependsOnRole = false, parallelModeNeeded = false, jitFlags = 0, planTree = 0x2ccafe8, rtable = 0x2ccb2a8, resultRelations = 0x2ccb348, nonleafResultRelations = 0x0, rootResultRelations = 0x0, subplans = 0x0, rewindPlanIDs = 0x0, rowMarks = 0x0, relationOids = 0x2ccb2f8, invalItems = 0x0, paramExecTypes = 0x2c31370, utilityStmt = 0x0, stmt_location = 0, stmt_len = 76}(gdb) p *(plan->planTree)#执行树,左右均无兄弟节点$2 = {type = T_ModifyTable, startup_cost = 0, total_cost = 0.01, plan_rows = 1, plan_width = 298, parallel_aware = false, parallel_safe = false, plan_node_id = 0, targetlist = 0x0, qual = 0x0, lefttree = 0x0, righttree = 0x0, initPlan = 0x0, extParam = 0x0, allParam = 0x0}(gdb) p *(plan->rtable)$3 = {type = T_List, length = 1, head = 0x2ccb288, tail = 0x2ccb288}(gdb) p *(plan->rtable->head)#Oid=46969208,可使用pg_class查询$4 = {data = {ptr_value = 0x2ccb178, int_value = 46969208, oid_value = 46969208}, next = 0x0}(gdb) p *(plan->resultRelations)$5 = {type = T_IntList, length = 1, head = 0x2ccb328, tail = 0x2ccb328}(gdb) p *(plan->resultRelations->head)#Oid=1?,可使用pg_class查询$6 = {data = {ptr_value = 0x1, int_value = 1, oid_value = 1}, next = 0x0}(gdb) p *(plan->relationOids)$7 = {type = T_OidList, length = 1, head = 0x2ccb2d8, tail = 0x2ccb2d8}(gdb) p *(plan->relationOids->head)#Oid=26731,可使用pg_class查询$8 = {data = {ptr_value = 0x686b, int_value = 26731, oid_value = 26731}, next = 0x0}#2、sourceText(gdb) p sourceText$11 = 0x2c09ef0 "insert into t_insert values(17,'ProcessQuery','ProcessQuery','ProcessQuery');"#3、params(gdb) p params#NULL$12 = (ParamListInfo) 0x0#4、queryEnv(gdb) p queryEnv#NULL$13 = (QueryEnvironment *) 0x0#5、dest(gdb) p dest$14 = (DestReceiver *) 0x2ccb4d8(gdb) p *dest$15 = {receiveSlot = 0x4857ad , rStartup = 0x485196 , rShutdown = 0x485bad , rDestroy = 0x485c21 , mydest = DestRemote}(gdb) #6、completionTag(gdb) p completionTag#空字符串$16 = 0x7ffe94ba4940 ""(gdb) next156 ExecutorStart(queryDesc, 0);(gdb) 161 ExecutorRun(queryDesc, ForwardScanDirection, 0L, true);(gdb) 166 if (completionTag)(gdb) 170 switch (queryDesc->operation)(gdb) 178 if (queryDesc->estate->es_processed == 1)(gdb) 179 lastOid = queryDesc->estate->es_lastoid;(gdb) p queryDesc->estate->es_lastoid$18 = 0(gdb) $19 = 0(gdb) next184 lastOid, queryDesc->estate->es_processed);(gdb) p lastOid$20 = 0(gdb) next182 snprintf(completionTag, COMPLETION_TAG_BUFSIZE,(gdb) 185 break;(gdb) p completionTag#返回标记,在psql中输出的信息$21 = 0x7ffe94ba4940 "INSERT 0 1"(gdb) $22 = 0x7ffe94ba4940 "INSERT 0 1"(gdb) next205 ExecutorFinish(queryDesc);(gdb) 206 ExecutorEnd(queryDesc);(gdb) 208 FreeQueryDesc(queryDesc);(gdb) 209 }(gdb) PortalRunMulti (portal=0x2c6f490, isTopLevel=true, setHoldSnapshot=false, dest=0x2ccb4d8, altdest=0x2ccb4d8, completionTag=0x7ffe94ba4940 "INSERT 0 1") at pquery.c:13021302 if (log_executor_stats)(gdb) #DONE!
为了更深入理解整个执行过程中最重要的两个数据结构PlanState和EState,对子函数InitPlan和CreateExecutorState作进一步的跟踪分析:
InitPlan
testdb=# -- 插入1行testdb=# insert into t_insert values(18,'ProcessQuery.InitPlan','ProcessQuery.InitPlan','ProcessQuery.InitPlan');(挂起)(gdb) b InitPlanBreakpoint 1 at 0x691560: file execMain.c, line 811.#查看参数#1、queryDesc(gdb) p *queryDesc$8 = {operation = CMD_INSERT, plannedstmt = 0x2ccb408, sourceText = 0x2c09ef0 "insert into t_insert values(18,'ProcessQuery.InitPlan','ProcessQuery.InitPlan','ProcessQuery.InitPlan');", snapshot = 0x2c2d920, crosscheck_snapshot = 0x0, dest = 0x2ccb568, params = 0x0, queryEnv = 0x0, instrument_options = 0, tupDesc = 0x0, estate = 0x2cbcc70, planstate = 0x0, already_executed = false, totaltime = 0x0}#2、eflags(gdb) p eflags$9 = 0(gdb) next812 PlannedStmt *plannedstmt = queryDesc->plannedstmt;(gdb) 813 Plan *plan = plannedstmt->planTree;(gdb) 814 List *rangeTable = plannedstmt->rtable;(gdb) p *(queryDesc->plannedstmt)$10 = {type = T_PlannedStmt, commandType = CMD_INSERT, queryId = 0, hasReturning = false, hasModifyingCTE = false, canSetTag = true, transientPlan = false, dependsOnRole = false, parallelModeNeeded = false, jitFlags = 0, planTree = 0x2ccb078, rtable = 0x2ccb338, resultRelations = 0x2ccb3d8, nonleafResultRelations = 0x0, rootResultRelations = 0x0, subplans = 0x0, rewindPlanIDs = 0x0, rowMarks = 0x0, relationOids = 0x2ccb388, invalItems = 0x0, paramExecTypes = 0x2c313f8, utilityStmt = 0x0, stmt_location = 0, stmt_len = 103}(gdb) next815 EState *estate = queryDesc->estate;(gdb) 824 ExecCheckRTPerms(rangeTable, true);(gdb) 829 estate->es_range_table = rangeTable;(gdb) 830 estate->es_plannedstmt = plannedstmt;(gdb) p *rangeTable$11 = {type = T_List, length = 1, head = 0x2ccb318, tail = 0x2ccb318}(gdb) p *(rangeTable->head)$12 = {data = {ptr_value = 0x2ccb208, int_value = 46969352, oid_value = 46969352}, next = 0x0}(gdb) next838 if (plannedstmt->resultRelations)(gdb) 840 List *resultRelations = plannedstmt->resultRelations;(gdb) 841 int numResultRelations = list_length(resultRelations);(gdb) 846 palloc(numResultRelations * sizeof(ResultRelInfo));(gdb) p numResultRelations$13 = 1(gdb) p *(resultRelations->head)$14 = {data = {ptr_value = 0x1, int_value = 1, oid_value = 1}, next = 0x0}(gdb) (gdb) next845 resultRelInfos = (ResultRelInfo *)(gdb) 847 resultRelInfo = resultRelInfos;(gdb) 848 foreach(l, resultRelations)(gdb) 850 Index resultRelationIndex = lfirst_int(l);(gdb) 854 resultRelationOid = getrelid(resultRelationIndex, rangeTable);(gdb) 855 resultRelation = heap_open(resultRelationOid, RowExclusiveLock);(gdb) 857 InitResultRelInfo(resultRelInfo,(gdb) p resultRelationOid$15 = 26731(gdb) p resultRelation$16 = (Relation) 0x7f3a64247b78#目标Relation,t_insert(gdb) p *resultRelation$17 = {rd_node = {spcNode = 1663, dbNode = 16477, relNode = 26747}, rd_smgr = 0x2c99328, rd_refcnt = 1, rd_backend = -1, rd_islocaltemp = false, rd_isnailed = false, rd_isvalid = true, rd_indexvalid = 1 '\001', rd_statvalid = true, rd_createSubid = 0, rd_newRelfilenodeSubid = 0, rd_rel = 0x7f3a64247d88, rd_att = 0x7f3a64247e98, rd_id = 26731, rd_lockInfo = {lockRelId = { relId = 26731, dbId = 16477}}, rd_rules = 0x0, rd_rulescxt = 0x0, trigdesc = 0x0, rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkeycxt = 0x0, rd_partkey = 0x0, rd_pdcxt = 0x0, rd_partdesc = 0x0, rd_partcheck = 0x0, rd_indexlist = 0x7f3a64249cf0, rd_oidindex = 0, rd_pkindex = 26737, rd_replidindex = 26737, rd_statlist = 0x0, rd_indexattr = 0x0, rd_projindexattr = 0x0, rd_keyattr = 0x0, rd_pkattr = 0x0, rd_idattr = 0x0, rd_projidx = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_index = 0x0, rd_indextuple = 0x0, rd_amhandler = 0, rd_indexcxt = 0x0, rd_amroutine = 0x0, rd_opfamily = 0x0, rd_opcintype = 0x0, rd_support = 0x0, rd_supportinfo = 0x0, rd_indoption = 0x0, rd_indexprs = 0x0, rd_indpred = 0x0, rd_exclops = 0x0, rd_exclprocs = 0x0, rd_exclstrats = 0x0, rd_amcache = 0x0, rd_indcollation = 0x0, rd_fdwroutine = 0x0, rd_toastoid = 0, pgstat_info = 0x2c8ae98}(gdb) gdb) next848 foreach(l, resultRelations)(gdb) 864 estate->es_result_relations = resultRelInfos;(gdb) 865 estate->es_num_result_relations = numResultRelations;(gdb) 867 estate->es_result_relation_info = NULL;(gdb) 874 estate->es_root_result_relations = NULL;(gdb) 875 estate->es_num_root_result_relations = 0;(gdb) 876 if (plannedstmt->nonleafResultRelations)(gdb) p *plannedstmt->nonleafResultRelationsCannot access memory at address 0x0(gdb) next941 estate->es_rowMarks = NIL;942 foreach(l, plannedstmt->rowMarks)(gdb) p plannedstmt->rowMarks$19 = (List *) 0x0(gdb) next1003 estate->es_tupleTable = NIL;(gdb) 1004 estate->es_trig_tuple_slot = NULL;(gdb) 1005 estate->es_trig_oldtup_slot = NULL;(gdb) 1006 estate->es_trig_newtup_slot = NULL;(gdb) 1009 estate->es_epqTuple = NULL;(gdb) 1010 estate->es_epqTupleSet = NULL;(gdb) 1011 estate->es_epqScanDone = NULL;(gdb) 1019 i = 1; /* subplan indices count from 1 */(gdb) 1020 foreach(l, plannedstmt->subplans)(gdb) p *plannedstmt->subplansCannot access memory at address 0x0(gdb) next1049 planstate = ExecInitNode(plan, estate, eflags);(gdb) stepExecInitNode (node=0x2ccb078, estate=0x2cbcc70, eflags=0) at execProcnode.c:148148 if (node == NULL)(gdb) next156 check_stack_depth();(gdb) 158 switch (nodeTag(node))(gdb) 174 result = (PlanState *) ExecInitModifyTable((ModifyTable *) node,(gdb) stepExecInitModifyTable (node=0x2ccb078, estate=0x2cbcc70, eflags=0) at nodeModifyTable.c:21792179 CmdType operation = node->operation;(gdb) next2180 int nplans = list_length(node->plans);(gdb) 2187 bool update_tuple_routing_needed = node->partColsUpdated;(gdb) p node->plans$20 = (List *) 0x2c317c8(gdb) p *(node->plans)$21 = {type = T_List, length = 1, head = 0x2ccb058, tail = 0x2ccb058}(gdb) p *(node->plans->head)$22 = {data = {ptr_value = 0x2c315b8, int_value = 46339512, oid_value = 46339512}, next = 0x0}(gdb) next2195 mtstate = makeNode(ModifyTableState);(gdb) 2196 mtstate->ps.plan = (Plan *) node;(gdb) p *mtstate$23 = {ps = {type = T_ModifyTableState, plan = 0x0, state = 0x0, ExecProcNode = 0x0, ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, qual = 0x0, lefttree = 0x0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x0, ps_ExprContext = 0x0, ps_ProjInfo = 0x0, scandesc = 0x0}, operation = CMD_UNKNOWN, canSetTag = false, mt_done = false, mt_plans = 0x0, mt_nplans = 0, mt_whichplan = 0, resultRelInfo = 0x0, rootResultRelInfo = 0x0, mt_arowmarks = 0x0, mt_epqstate = {estate = 0x0, planstate = 0x0, origslot = 0x0, plan = 0x0, arowMarks = 0x0, epqParam = 0}, fireBSTriggers = false, mt_existing = 0x0, mt_excludedtlist = 0x0, mt_conflproj = 0x0, mt_partition_tuple_routing = 0x0, mt_transition_capture = 0x0, mt_oc_transition_capture = 0x0, mt_per_subplan_tupconv_maps = 0x0}(gdb) next2197 mtstate->ps.state = estate;(gdb) 2198 mtstate->ps.ExecProcNode = ExecModifyTable;(gdb) 2200 mtstate->operation = operation;(gdb) 2201 mtstate->canSetTag = node->canSetTag;(gdb) 2202 mtstate->mt_done = false;(gdb) 2204 mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);(gdb) 2205 mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;(gdb) 2208 if (node->rootResultRelIndex >= 0)(gdb) 2212 mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);(gdb) 2213 mtstate->mt_nplans = nplans;(gdb) 2216 EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);(gdb) 2217 mtstate->fireBSTriggers = true;(gdb) 2227 saved_resultRelInfo = estate->es_result_relation_info;(gdb) p *(mtstate->mt_epqstate)Structure has no component named operator*.(gdb) p mtstate->mt_epqstate$24 = {estate = 0x0, planstate = 0x0, origslot = 0x0, plan = 0x0, arowMarks = 0x0, epqParam = 0}(gdb) next2229 resultRelInfo = mtstate->resultRelInfo;(gdb) 2230 i = 0;(gdb) 2231 foreach(l, node->plans)(gdb) 2233 subplan = (Plan *) lfirst(l);(gdb) 2237 node->fdwDirectModifyPlans);(gdb) 2236 resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,(gdb) 2242 CheckValidResultRel(resultRelInfo, operation);(gdb) 2253 if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&(gdb) 2255 resultRelInfo->ri_IndexRelationDescs == NULL)(gdb) 2254 operation != CMD_DELETE &&(gdb) 2257 node->onConflictAction != ONCONFLICT_NONE);(gdb) 2256 ExecOpenIndices(resultRelInfo,(gdb) 2264 if (resultRelInfo->ri_TrigDesc &&(gdb) 2270 estate->es_result_relation_info = resultRelInfo;(gdb) 2271 mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);(gdb) finishRun till exit from #0 ExecInitModifyTable (node=0x2ccb078, estate=0x2cbcc70, eflags=0) at nodeModifyTable.c:22940x000000000069a21a in ExecInitNode (node=0x2ccb078, estate=0x2cbcc70, eflags=0) at execProcnode.c:174174 result = (PlanState *) ExecInitModifyTable((ModifyTable *) node,Value returned is $25 = (ModifyTableState *) 0x2cbcfc0(gdb) next176 break;(gdb) 373 ExecSetExecProcNode(result, result->ExecProcNode);(gdb) p result->ExecProcNode$26 = (ExecProcNodeMtd) 0x6c2485 (gdb) finishRun till exit from #0 ExecInitNode (node=0x2ccb078, estate=0x2cbcc70, eflags=0) at execProcnode.c:3920x0000000000691c2f in InitPlan (queryDesc=0x2cc1580, eflags=0) at execMain.c:10491049 planstate = ExecInitNode(plan, estate, eflags);Value returned is $27 = (PlanState *) 0x2cbcfc0(gdb) next1054 tupType = ExecGetResultType(planstate);(gdb) 1060 if (operation == CMD_SELECT)(gdb) p tupType$28 = (TupleDesc) 0x2cbdd40(gdb) p *tupType$29 = {natts = 0, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x2cbdd60}(gdb) next1090 queryDesc->tupDesc = tupType;(gdb) 1091 queryDesc->planstate = planstate;(gdb) 1092 }(gdb) standard_ExecutorStart (queryDesc=0x2cc1580, eflags=0) at execMain.c:266266 MemoryContextSwitchTo(oldcontext);(gdb) #DONE!
CreateExecutorState
#gdb(gdb) b CreateExecutorStateBreakpoint 1 at 0x69f2c5: file execUtils.c, line 89.#psqltestdb=# -- 插入1行testdb=# insert into t_insert values(19,'ProcessQuery.CreateExecutorState','ProcessQuery.CreateExecutorState','ProcessQuery.CreateExecutorState');(挂起)#gdb(gdb) cContinuing.Breakpoint 1, CreateExecutorState () at execUtils.c:8989 qcontext = AllocSetContextCreate(CurrentMemoryContext,#查看输入参数#该函数无输入参数(gdb) step #进入AllocSetContextCreate函数内部AllocSetContextCreateExtended (parent=0x2c09de0, name=0xb1a840 "ExecutorState", minContextSize=0, initBlockSize=8192, maxBlockSize=8388608) at aset.c:426426 if (minContextSize == ALLOCSET_DEFAULT_MINSIZE &&(gdb) next428 freeListIndex = 0;#查看AllocSetContextCreate的输入参数#1、parent(gdb) p *parent$3 = {type = T_AllocSetContext, isReset = false, allowInCritSection = false, methods = 0xb8c720 , parent = 0x2c04ba0, firstchild = 0x0, prevchild = 0x2c7fc60, nextchild = 0x2cb6b30, name = 0xb4e87c "MessageContext", ident = 0x0, reset_cbs = 0x0}#顶层Context(gdb) p *(parent->parent)$4 = {type = T_AllocSetContext, isReset = false, allowInCritSection = false, methods = 0xb8c720 , parent = 0x0, firstchild = 0x2c2d7e0, prevchild = 0x0, nextchild = 0x0, name = 0xb8d050 "TopMemoryContext", ident = 0x0, reset_cbs = 0x0}#2、name(gdb) p name$5 = 0xb1a840 "ExecutorState"#3、minContextSize(gdb) p minContextSize$6 = 0#4、initBlockSize(gdb) p initBlockSize$7 = 8192 #8KB#5、maxBlockSize(gdb) p maxBlockSize$8 = 8388608 #8MB(gdb) next440 AllocSetFreeList *freelist = &context_freelists[freeListIndex];(gdb) p freeListIndex $9 = 0(gdb) $10 = 0(gdb) next442 if (freelist->first_free != NULL)(gdb) p *freelist$11 = {num_free = 4, first_free = 0x2cbcb60}(gdb) p *(freelist->first_free)$12 = {header = {type = T_AllocSetContext, isReset = true, allowInCritSection = false, methods = 0xb8c720 , parent = 0x0, firstchild = 0x0, prevchild = 0x0, nextchild = 0x2cbeb70, name = 0xb1a840 "ExecutorState", ident = 0x0, reset_cbs = 0x0}, blocks = 0x2cbcc38, freelist = {0x0 }, initBlockSize = 8192, maxBlockSize = 8388608, nextBlockSize = 8192, allocChunkLimit = 8192, keeper = 0x2cbcc38, freeListIndex = 0}(gdb) next445 set = freelist->first_free;(gdb) 446 freelist->first_free = (AllocSet) set->header.nextchild;(gdb) 447 freelist->num_free--;(gdb) 450 set->maxBlockSize = maxBlockSize;(gdb) 453 MemoryContextCreate((MemoryContext) set,(gdb) 459 return (MemoryContext) set;(gdb) p *set$13 = {header = {type = T_AllocSetContext, isReset = true, allowInCritSection = false, methods = 0xb8c720 , parent = 0x2c09de0, firstchild = 0x0, prevchild = 0x0, nextchild = 0x0, name = 0xb1a840 "ExecutorState", ident = 0x0, reset_cbs = 0x0}, blocks = 0x2cbcc38, freelist = {0x0 }, initBlockSize = 8192, maxBlockSize = 8388608, nextBlockSize = 8192, allocChunkLimit = 8192, keeper = 0x2cbcc38, freeListIndex = 0}(gdb) next548 }(gdb) CreateExecutorState () at execUtils.c:9797 oldcontext = MemoryContextSwitchTo(qcontext);(gdb) 99 estate = makeNode(EState);(gdb) 104 estate->es_direction = ForwardScanDirection;(gdb) finishRun till exit from #0 CreateExecutorState () at execUtils.c:1040x000000000078cc2f in evaluate_expr (expr=0x2c30520, result_type=1043, result_typmod=44, result_collation=100) at clauses.c:48584858 estate = CreateExecutorState();Value returned is $14 = (EState *) 0x2cbcc70
四、小结
1、TODO:更进一步的理解,在执行查询语句时再进一步解读;
2、EState&PlanState数据结构:在此函数中构造,需进一步理解;
3、List数据结构:PG广泛的使用List这样的数据结构对各种信息进行管理。
函数
信息
数据
查询
参数
内存
结构
语句
大小
数据结构
状态
节点
输入
上下
上下文
头部
指针
标记
类型
结果
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
jieba分词与数据库
c 和嵌入式软件开发哪个好
电信云服务器ip是内网ip
软件开发行业的问题
护苗网络安全课内容
万宁天气预报软件开发
零基础自学手机软件开发
监利天气预报软件开发
广东网络技术系信息专业
安卓软件开发创建数据库
数据库软拷贝
网络工程师包含数据库知识吗
爱奇艺下载一直无法连接服务器
做ups软件开发
数据库连接池技术选型
客房管理数据库课程设计
平阳oa软件开发团队
深圳2019年网络安全宣传周
友好合作网络安全
重庆管理软件开发平台
信息网络安全研究院
监利天气预报软件开发
上海oa软件开发服务
肥东口碑好的网络技术
皮书数据库主要内容
上海积点网络技术公司
2020年国家网络安全威海
linux 做软件开发
计算机三级数据库抽题重复率
聚焦网络技术团队