PostgreSQL 源码解读(10)- 插入数据#9(ProcessQuery)
发表于:2024-11-29 作者:千家信息网编辑
千家信息网最后更新 2024年11月29日,本文简单介绍了PG插入数据部分的源码,主要内容包括ProcessQuery函数的实现逻辑,该函数位于文件pquery.c中。一、基础信息ProcessQuery函数使用的数据结构、宏定义以及依赖的函数
千家信息网最后更新 2024年11月29日PostgreSQL 源码解读(10)- 插入数据#9(ProcessQuery)
本文简单介绍了PG插入数据部分的源码,主要内容包括ProcessQuery函数的实现逻辑,该函数位于文件pquery.c中。
一、基础信息
ProcessQuery函数使用的数据结构、宏定义以及依赖的函数等。
数据结构/宏定义
1、NodeTag
//节点标记,枚举类型 /* * The first field of every node is NodeTag. Each node created (with makeNode) * will have one of the following tags as the value of its first field. * * Note that inserting or deleting node types changes the numbers of other * node types later in the list. This is no problem during development, since * the node numbers are never stored on disk. But don't do it in a released * branch, because that would represent an ABI break for extensions. */ typedef enum NodeTag { T_Invalid = 0, /* * TAGS FOR EXECUTOR NODES (execnodes.h) */ T_IndexInfo, T_ExprContext, T_ProjectionInfo, T_JunkFilter, T_OnConflictSetState, T_ResultRelInfo, T_EState, T_TupleTableSlot, /* * TAGS FOR PLAN NODES (plannodes.h) */ T_Plan, T_Result, T_ProjectSet, T_ModifyTable, T_Append, T_MergeAppend, T_RecursiveUnion, T_BitmapAnd, T_BitmapOr, T_Scan, T_SeqScan, T_SampleScan, T_IndexScan, T_IndexOnlyScan, T_BitmapIndexScan, T_BitmapHeapScan, T_TidScan, T_SubqueryScan, T_FunctionScan, T_ValuesScan, T_TableFuncScan, T_CteScan, T_NamedTuplestoreScan, T_WorkTableScan, T_ForeignScan, T_CustomScan, T_Join, T_NestLoop, T_MergeJoin, T_HashJoin, T_Material, T_Sort, T_Group, T_Agg, T_WindowAgg, T_Unique, T_Gather, T_GatherMerge, T_Hash, T_SetOp, T_LockRows, T_Limit, /* these aren't subclasses of Plan: */ T_NestLoopParam, T_PlanRowMark, T_PartitionPruneInfo, T_PartitionedRelPruneInfo, T_PartitionPruneStepOp, T_PartitionPruneStepCombine, T_PlanInvalItem, /* * TAGS FOR PLAN STATE NODES (execnodes.h) * * These should correspond one-to-one with Plan node types. */ T_PlanState, T_ResultState, T_ProjectSetState, T_ModifyTableState, T_AppendState, T_MergeAppendState, T_RecursiveUnionState, T_BitmapAndState, T_BitmapOrState, T_ScanState, T_SeqScanState, T_SampleScanState, T_IndexScanState, T_IndexOnlyScanState, T_BitmapIndexScanState, T_BitmapHeapScanState, T_TidScanState, T_SubqueryScanState, T_FunctionScanState, T_TableFuncScanState, T_ValuesScanState, T_CteScanState, T_NamedTuplestoreScanState, T_WorkTableScanState, T_ForeignScanState, T_CustomScanState, T_JoinState, T_NestLoopState, T_MergeJoinState, T_HashJoinState, T_MaterialState, T_SortState, T_GroupState, T_AggState, T_WindowAggState, T_UniqueState, T_GatherState, T_GatherMergeState, T_HashState, T_SetOpState, T_LockRowsState, T_LimitState, /* * TAGS FOR PRIMITIVE NODES (primnodes.h) */ T_Alias, T_RangeVar, T_TableFunc, T_Expr, T_Var, T_Const, T_Param, T_Aggref, T_GroupingFunc, T_WindowFunc, T_ArrayRef, T_FuncExpr, T_NamedArgExpr, T_OpExpr, T_DistinctExpr, T_NullIfExpr, T_ScalarArrayOpExpr, T_BoolExpr, T_SubLink, T_SubPlan, T_AlternativeSubPlan, T_FieldSelect, T_FieldStore, T_RelabelType, T_CoerceViaIO, T_ArrayCoerceExpr, T_ConvertRowtypeExpr, T_CollateExpr, T_CaseExpr, T_CaseWhen, T_CaseTestExpr, T_ArrayExpr, T_RowExpr, T_RowCompareExpr, T_CoalesceExpr, T_MinMaxExpr, T_SQLValueFunction, T_XmlExpr, T_NullTest, T_BooleanTest, T_CoerceToDomain, T_CoerceToDomainValue, T_SetToDefault, T_CurrentOfExpr, T_NextValueExpr, T_InferenceElem, T_TargetEntry, T_RangeTblRef, T_JoinExpr, T_FromExpr, T_OnConflictExpr, T_IntoClause, /* * TAGS FOR EXPRESSION STATE NODES (execnodes.h) * * ExprState represents the evaluation state for a whole expression tree. * Most Expr-based plan nodes do not have a corresponding expression state * node, they're fully handled within execExpr* - but sometimes the state * needs to be shared with other parts of the executor, as for example * with AggrefExprState, which nodeAgg.c has to modify. */ T_ExprState, T_AggrefExprState, T_WindowFuncExprState, T_SetExprState, T_SubPlanState, T_AlternativeSubPlanState, T_DomainConstraintState, /* * TAGS FOR PLANNER NODES (relation.h) */ T_PlannerInfo, T_PlannerGlobal, T_RelOptInfo, T_IndexOptInfo, T_ForeignKeyOptInfo, T_ParamPathInfo, T_Path, T_IndexPath, T_BitmapHeapPath, T_BitmapAndPath, T_BitmapOrPath, T_TidPath, T_SubqueryScanPath, T_ForeignPath, T_CustomPath, T_NestPath, T_MergePath, T_HashPath, T_AppendPath, T_MergeAppendPath, T_ResultPath, T_MaterialPath, T_UniquePath, T_GatherPath, T_GatherMergePath, T_ProjectionPath, T_ProjectSetPath, T_SortPath, T_GroupPath, T_UpperUniquePath, T_AggPath, T_GroupingSetsPath, T_MinMaxAggPath, T_WindowAggPath, T_SetOpPath, T_RecursiveUnionPath, T_LockRowsPath, T_ModifyTablePath, T_LimitPath, /* these aren't subclasses of Path: */ T_EquivalenceClass, T_EquivalenceMember, T_PathKey, T_PathTarget, T_RestrictInfo, T_PlaceHolderVar, T_SpecialJoinInfo, T_AppendRelInfo, T_PlaceHolderInfo, T_MinMaxAggInfo, T_PlannerParamItem, T_RollupData, T_GroupingSetData, T_StatisticExtInfo, /* * TAGS FOR MEMORY NODES (memnodes.h) */ T_MemoryContext, T_AllocSetContext, T_SlabContext, T_GenerationContext, /* * TAGS FOR VALUE NODES (value.h) */ T_Value, T_Integer, T_Float, T_String, T_BitString, T_Null, /* * TAGS FOR LIST NODES (pg_list.h) */ T_List, T_IntList, T_OidList, /* * TAGS FOR EXTENSIBLE NODES (extensible.h) */ T_ExtensibleNode, /* * TAGS FOR STATEMENT NODES (mostly in parsenodes.h) */ T_RawStmt, T_Query, T_PlannedStmt, T_InsertStmt, T_DeleteStmt, T_UpdateStmt, T_SelectStmt, T_AlterTableStmt, T_AlterTableCmd, T_AlterDomainStmt, T_SetOperationStmt, T_GrantStmt, T_GrantRoleStmt, T_AlterDefaultPrivilegesStmt, T_ClosePortalStmt, T_ClusterStmt, T_CopyStmt, T_CreateStmt, T_DefineStmt, T_DropStmt, T_TruncateStmt, T_CommentStmt, T_FetchStmt, T_IndexStmt, T_CreateFunctionStmt, T_AlterFunctionStmt, T_DoStmt, T_RenameStmt, T_RuleStmt, T_NotifyStmt, T_ListenStmt, T_UnlistenStmt, T_TransactionStmt, T_ViewStmt, T_LoadStmt, T_CreateDomainStmt, T_CreatedbStmt, T_DropdbStmt, T_VacuumStmt, T_ExplainStmt, T_CreateTableAsStmt, T_CreateSeqStmt, T_AlterSeqStmt, T_VariableSetStmt, T_VariableShowStmt, T_DiscardStmt, T_CreateTrigStmt, T_CreatePLangStmt, T_CreateRoleStmt, T_AlterRoleStmt, T_DropRoleStmt, T_LockStmt, T_ConstraintsSetStmt, T_ReindexStmt, T_CheckPointStmt, T_CreateSchemaStmt, T_AlterDatabaseStmt, T_AlterDatabaseSetStmt, T_AlterRoleSetStmt, T_CreateConversionStmt, T_CreateCastStmt, T_CreateOpClassStmt, T_CreateOpFamilyStmt, T_AlterOpFamilyStmt, T_PrepareStmt, T_ExecuteStmt, T_DeallocateStmt, T_DeclareCursorStmt, T_CreateTableSpaceStmt, T_DropTableSpaceStmt, T_AlterObjectDependsStmt, T_AlterObjectSchemaStmt, T_AlterOwnerStmt, T_AlterOperatorStmt, T_DropOwnedStmt, T_ReassignOwnedStmt, T_CompositeTypeStmt, T_CreateEnumStmt, T_CreateRangeStmt, T_AlterEnumStmt, T_AlterTSDictionaryStmt, T_AlterTSConfigurationStmt, T_CreateFdwStmt, T_AlterFdwStmt, T_CreateForeignServerStmt, T_AlterForeignServerStmt, T_CreateUserMappingStmt, T_AlterUserMappingStmt, T_DropUserMappingStmt, T_AlterTableSpaceOptionsStmt, T_AlterTableMoveAllStmt, T_SecLabelStmt, T_CreateForeignTableStmt, T_ImportForeignSchemaStmt, T_CreateExtensionStmt, T_AlterExtensionStmt, T_AlterExtensionContentsStmt, T_CreateEventTrigStmt, T_AlterEventTrigStmt, T_RefreshMatViewStmt, T_ReplicaIdentityStmt, T_AlterSystemStmt, T_CreatePolicyStmt, T_AlterPolicyStmt, T_CreateTransformStmt, T_CreateAmStmt, T_CreatePublicationStmt, T_AlterPublicationStmt, T_CreateSubscriptionStmt, T_AlterSubscriptionStmt, T_DropSubscriptionStmt, T_CreateStatsStmt, T_AlterCollationStmt, T_CallStmt, /* * TAGS FOR PARSE TREE NODES (parsenodes.h) */ T_A_Expr, T_ColumnRef, T_ParamRef, T_A_Const, T_FuncCall, T_A_Star, T_A_Indices, T_A_Indirection, T_A_ArrayExpr, T_ResTarget, T_MultiAssignRef, T_TypeCast, T_CollateClause, T_SortBy, T_WindowDef, T_RangeSubselect, T_RangeFunction, T_RangeTableSample, T_RangeTableFunc, T_RangeTableFuncCol, T_TypeName, T_ColumnDef, T_IndexElem, T_Constraint, T_DefElem, T_RangeTblEntry, T_RangeTblFunction, T_TableSampleClause, T_WithCheckOption, T_SortGroupClause, T_GroupingSet, T_WindowClause, T_ObjectWithArgs, T_AccessPriv, T_CreateOpClassItem, T_TableLikeClause, T_FunctionParameter, T_LockingClause, T_RowMarkClause, T_XmlSerialize, T_WithClause, T_InferClause, T_OnConflictClause, T_CommonTableExpr, T_RoleSpec, T_TriggerTransition, T_PartitionElem, T_PartitionSpec, T_PartitionBoundSpec, T_PartitionRangeDatum, T_PartitionCmd, T_VacuumRelation, /* * TAGS FOR REPLICATION GRAMMAR PARSE NODES (replnodes.h) */ T_IdentifySystemCmd, T_BaseBackupCmd, T_CreateReplicationSlotCmd, T_DropReplicationSlotCmd, T_StartReplicationCmd, T_TimeLineHistoryCmd, T_SQLCmd, /* * TAGS FOR RANDOM OTHER STUFF * * These are objects that aren't part of parse/plan/execute node tree * structures, but we give them NodeTags anyway for identification * purposes (usually because they are involved in APIs where we want to * pass multiple object types through the same pointer). */ T_TriggerData, /* in commands/trigger.h */ T_EventTriggerData, /* in commands/event_trigger.h */ T_ReturnSetInfo, /* in nodes/execnodes.h */ T_WindowObjectData, /* private in nodeWindowAgg.c */ T_TIDBitmap, /* in nodes/tidbitmap.h */ T_InlineCodeBlock, /* in nodes/parsenodes.h */ T_FdwRoutine, /* in foreign/fdwapi.h */ T_IndexAmRoutine, /* in access/amapi.h */ T_TsmRoutine, /* in access/tsmapi.h */ T_ForeignKeyCacheInfo, /* in utils/rel.h */ T_CallContext /* in nodes/parsenodes.h */ } NodeTag; /* * The first field of a node of any type is guaranteed to be the NodeTag. * Hence the type of any node can be gotten by casting it to Node. Declaring * a variable to be of Node * (instead of void *) can also facilitate * debugging. */ typedef struct Node { NodeTag type; } Node; #define nodeTag(nodeptr) (((const Node*)(nodeptr))->type)
2、MemoryContext
//内存上下文//AllocSetContext结构体的MemoryContextData与其共享// typedef struct MemoryContextData { NodeTag type; /* identifies exact kind of context */ /* these two fields are placed here to minimize alignment wastage: */ bool isReset; /* T = no space alloced since last reset */ bool allowInCritSection; /* allow palloc in critical section */ const MemoryContextMethods *methods; /* virtual function table */ MemoryContext parent; /* NULL if no parent (toplevel context) */ MemoryContext firstchild; /* head of linked list of children */ MemoryContext prevchild; /* previous child of same parent */ MemoryContext nextchild; /* next child of same parent */ const char *name; /* context name (just for debugging) */ const char *ident; /* context ID if any (just for debugging) */ MemoryContextCallback *reset_cbs; /* list of reset/delete callbacks */ } MemoryContextData; /* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */ /* * Type MemoryContextData is declared in nodes/memnodes.h. Most users * of memory allocation should just treat it as an abstract type, so we * do not provide the struct contents here. */ typedef struct MemoryContextData *MemoryContext;
3、AllocSet
/* * AllocSetContext is our standard implementation of MemoryContext. * * Note: header.isReset means there is nothing for AllocSetReset to do. * This is different from the aset being physically empty (empty blocks list) * because we will still have a keeper block. It's also different from the set * being logically empty, because we don't attempt to detect pfree'ing the * last active chunk. */ typedef struct AllocSetContext { MemoryContextData header; /* Standard memory-context fields */ /* Info about storage allocated in this context: */ AllocBlock blocks; /* head of list of blocks in this set */ AllocChunk freelist[ALLOCSET_NUM_FREELISTS]; /* free chunk lists */ /* Allocation parameters for this context: */ Size initBlockSize; /* initial block size */ Size maxBlockSize; /* maximum block size */ Size nextBlockSize; /* next block size to allocate */ Size allocChunkLimit; /* effective chunk size limit */ AllocBlock keeper; /* keep this block over resets */ /* freelist this context could be put in, or -1 if not a candidate: */ int freeListIndex; /* index in context_freelists[], or -1 */ } AllocSetContext; typedef AllocSetContext *AllocSet;
4、AllocBlock
/* * AllocBlock * An AllocBlock is the unit of memory that is obtained by aset.c * from malloc(). It contains one or more AllocChunks, which are * the units requested by palloc() and freed by pfree(). AllocChunks * cannot be returned to malloc() individually, instead they are put * on freelists by pfree() and re-used by the next palloc() that has * a matching request size. * * AllocBlockData is the header data for a block --- the usable space * within the block begins at the next alignment boundary. */ typedef struct AllocBlockData { AllocSet aset; /* aset that owns this block */ AllocBlock prev; /* prev block in aset's blocks list, if any */ AllocBlock next; /* next block in aset's blocks list, if any */ char *freeptr; /* start of free space in this block */ char *endptr; /* end of space in this block */ } AllocBlockData; typedef struct AllocBlockData *AllocBlock; /* forward reference */
5、AllocChunk
/* * AllocChunk * The prefix of each piece of memory in an AllocBlock * * Note: to meet the memory context APIs, the payload area of the chunk must * be maxaligned, and the "aset" link must be immediately adjacent to the * payload area (cf. GetMemoryChunkContext). We simplify matters for this * module by requiring sizeof(AllocChunkData) to be maxaligned, and then * we can ensure things work by adding any required alignment padding before * the "aset" field. There is a static assertion below that the alignment * is done correctly. */ typedef struct AllocChunkData { /* size is always the size of the usable space in the chunk */ Size size; #ifdef MEMORY_CONTEXT_CHECKING /* when debugging memory usage, also store actual requested size */ /* this is zero in a free chunk */ Size requested_size; #define ALLOCCHUNK_RAWSIZE (SIZEOF_SIZE_T * 2 + SIZEOF_VOID_P) #else #define ALLOCCHUNK_RAWSIZE (SIZEOF_SIZE_T + SIZEOF_VOID_P) #endif /* MEMORY_CONTEXT_CHECKING */ /* ensure proper alignment by adding padding if needed */ #if (ALLOCCHUNK_RAWSIZE % MAXIMUM_ALIGNOF) != 0 char padding[MAXIMUM_ALIGNOF - ALLOCCHUNK_RAWSIZE % MAXIMUM_ALIGNOF]; #endif /* aset is the owning aset if allocated, or the freelist link if free */ void *aset; /* there must not be any padding to reach a MAXALIGN boundary here! */ } AllocChunkData; typedef struct AllocChunkData *AllocChunk;
6、AllocSetFreeList
typedef struct AllocSetFreeList{ int num_free; /* current list length */ AllocSetContext *first_free; /* list header */} AllocSetFreeList;
7、context_freelists
/* context_freelists[0] is for default params, [1] for small params */static AllocSetFreeList context_freelists[2] ={ { 0, NULL }, { 0, NULL }};
8、AllocSetMethods
//内存管理方法 typedef struct MemoryContextMethods { void *(*alloc) (MemoryContext context, Size size); /* call this free_p in case someone #define's free() */ void (*free_p) (MemoryContext context, void *pointer); void *(*realloc) (MemoryContext context, void *pointer, Size size); void (*reset) (MemoryContext context); void (*delete_context) (MemoryContext context); Size (*get_chunk_space) (MemoryContext context, void *pointer); bool (*is_empty) (MemoryContext context); void (*stats) (MemoryContext context, MemoryStatsPrintFunc printfunc, void *passthru, MemoryContextCounters *totals); #ifdef MEMORY_CONTEXT_CHECKING void (*check) (MemoryContext context); #endif } MemoryContextMethods;//预定义 /* * This is the virtual function table for AllocSet contexts. */ static const MemoryContextMethods AllocSetMethods = { AllocSetAlloc, AllocSetFree, AllocSetRealloc, AllocSetReset, AllocSetDelete, AllocSetGetChunkSpace, AllocSetIsEmpty, AllocSetStats #ifdef MEMORY_CONTEXT_CHECKING ,AllocSetCheck #endif };
9、宏定义
#define ALLOC_BLOCKHDRSZ MAXALIGN(sizeof(AllocBlockData)) #define ALLOC_CHUNKHDRSZ sizeof(struct AllocChunkData) /* * Recommended default alloc parameters, suitable for "ordinary" contexts * that might hold quite a lot of data. */ #define ALLOCSET_DEFAULT_MINSIZE 0 #define ALLOCSET_DEFAULT_INITSIZE (8 * 1024) #define ALLOCSET_DEFAULT_MAXSIZE (8 * 1024 * 1024) #define ALLOCSET_DEFAULT_SIZES \ ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE #define VALGRIND_MAKE_MEM_NOACCESS(addr, size) do {} while (0) #define ALLOC_MINBITS 3 /* smallest chunk size is 8 bytes */ #define ALLOCSET_NUM_FREELISTS 11 #define ALLOC_CHUNK_LIMIT (1 << (ALLOCSET_NUM_FREELISTS-1+ALLOC_MINBITS)) #define ALLOCSET_SEPARATE_THRESHOLD 8192#define ALLOC_CHUNK_FRACTION 4 /* We allow chunks to be at most 1/4 of maxBlockSize (less overhead) */
依赖的函数
1、CreateQueryDesc
//根据输入的参数构造QueryDesc /* * CreateQueryDesc */ QueryDesc * CreateQueryDesc(PlannedStmt *plannedstmt, const char *sourceText, Snapshot snapshot, Snapshot crosscheck_snapshot, DestReceiver *dest, ParamListInfo params, QueryEnvironment *queryEnv, int instrument_options) { QueryDesc *qd = (QueryDesc *) palloc(sizeof(QueryDesc)); qd->operation = plannedstmt->commandType; /* operation */ qd->plannedstmt = plannedstmt; /* plan */ qd->sourceText = sourceText; /* query text */ qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */ /* RI check snapshot */ qd->crosscheck_snapshot = RegisterSnapshot(crosscheck_snapshot); qd->dest = dest; /* output dest */ qd->params = params; /* parameter values passed into query */ qd->queryEnv = queryEnv; qd->instrument_options = instrument_options; /* instrumentation wanted? */ /* null these fields until set by ExecutorStart */ qd->tupDesc = NULL; qd->estate = NULL; qd->planstate = NULL; qd->totaltime = NULL; /* not yet executed */ qd->already_executed = false; return qd; }
2、CreateExecutorState
/* ---------------- * CreateExecutorState * * Create and initialize an EState node, which is the root of * working storage for an entire Executor invocation. * * Principally, this creates the per-query memory context that will be * used to hold all working data that lives till the end of the query. * Note that the per-query context will become a child of the caller's * CurrentMemoryContext. * ---------------- */ EState * CreateExecutorState(void)//构建EState { EState *estate;//EState指针 MemoryContext qcontext; MemoryContext oldcontext; /* * Create the per-query context for this Executor run. */ qcontext = AllocSetContextCreate(CurrentMemoryContext, "ExecutorState", ALLOCSET_DEFAULT_SIZES);//创建MemoryContext /* * Make the EState node within the per-query context. This way, we don't * need a separate pfree() operation for it at shutdown. */ oldcontext = MemoryContextSwitchTo(qcontext);//切换至新创建的MemoryContext estate = makeNode(EState);//创建Node /* * Initialize all fields of the Executor State structure */ //初始化Executor State structure estate->es_direction = ForwardScanDirection; estate->es_snapshot = InvalidSnapshot; /* caller must initialize this */ estate->es_crosscheck_snapshot = InvalidSnapshot; /* no crosscheck */ estate->es_range_table = NIL; estate->es_plannedstmt = NULL; estate->es_junkFilter = NULL; estate->es_output_cid = (CommandId) 0; estate->es_result_relations = NULL; estate->es_num_result_relations = 0; estate->es_result_relation_info = NULL; estate->es_root_result_relations = NULL; estate->es_num_root_result_relations = 0; estate->es_tuple_routing_result_relations = NIL; estate->es_trig_target_relations = NIL; estate->es_trig_tuple_slot = NULL; estate->es_trig_oldtup_slot = NULL; estate->es_trig_newtup_slot = NULL; estate->es_param_list_info = NULL; estate->es_param_exec_vals = NULL; estate->es_queryEnv = NULL; estate->es_query_cxt = qcontext; estate->es_tupleTable = NIL; estate->es_rowMarks = NIL; estate->es_processed = 0; estate->es_lastoid = InvalidOid; estate->es_top_eflags = 0; estate->es_instrument = 0; estate->es_finished = false; estate->es_exprcontexts = NIL; estate->es_subplanstates = NIL; estate->es_auxmodifytables = NIL; estate->es_per_tuple_exprcontext = NULL; estate->es_epqTuple = NULL; estate->es_epqTupleSet = NULL; estate->es_epqScanDone = NULL; estate->es_sourceText = NULL; estate->es_use_parallel_mode = false; estate->es_jit_flags = 0; estate->es_jit = NULL; /* * Return the executor state structure */ MemoryContextSwitchTo(oldcontext); return estate; } /*------------------ makeNode --------------------*/ #define makeNode(_type_) ((_type_ *) newNode(sizeof(_type_),T_##_type_)) /* * newNode - * create a new node of the specified size and tag the node with the * specified tag. * * !WARNING!: Avoid using newNode directly. You should be using the * macro makeNode. eg. to create a Query node, use makeNode(Query) * * Note: the size argument should always be a compile-time constant, so the * apparent risk of multiple evaluation doesn't matter in practice. */ #ifdef __GNUC__ /* With GCC, we can use a compound statement within an expression */ #define newNode(size, tag) \ ({ Node *_result; \ AssertMacro((size) >= sizeof(Node)); /* need the tag, at least */ \ _result = (Node *) palloc0fast(size); \ _result->type = (tag); \ _result; \ }) #else /* * There is no way to dereference the palloc'ed pointer to assign the * tag, and also return the pointer itself, so we need a holder variable. * Fortunately, this macro isn't recursive so we just define * a global variable for this purpose. */ extern PGDLLIMPORT Node *newNodeMacroHolder; #define newNode(size, tag) \ ( \ AssertMacro((size) >= sizeof(Node)), /* need the tag, at least */ \ newNodeMacroHolder = (Node *) palloc0fast(size), \ newNodeMacroHolder->type = (tag), \ newNodeMacroHolder \ ) #endif /* __GNUC__ */ /*------------------ AllocSetContextCreate --------------------*/ #define AllocSetContextCreate(parent, name, allocparams) \ AllocSetContextCreateExtended(parent, name, allocparams) #endif /* * AllocSetContextCreateExtended * Create a new AllocSet context. * * parent: parent context, or NULL if top-level context * name: name of context (must be statically allocated) * minContextSize: minimum context size * initBlockSize: initial allocation block size * maxBlockSize: maximum allocation block size * * Most callers should abstract the context size parameters using a macro * such as ALLOCSET_DEFAULT_SIZES. (This is now *required* when going * through the AllocSetContextCreate macro.) */ MemoryContext AllocSetContextCreateExtended(MemoryContext parent,//上一级的内存上下文 const char *name,//MemoryContext名称 Size minContextSize,//最小大小 Size initBlockSize,//初始化大小 Size maxBlockSize)//最大大小 { int freeListIndex;//空闲列表索引 Size firstBlockSize;//第一个Block的Size AllocSet set;//AllocSetContext指针 AllocBlock block;//Context中的Block /* Assert we padded AllocChunkData properly */ StaticAssertStmt(ALLOC_CHUNKHDRSZ == MAXALIGN(ALLOC_CHUNKHDRSZ), "sizeof(AllocChunkData) is not maxaligned");//对齐 StaticAssertStmt(offsetof(AllocChunkData, aset) + sizeof(MemoryContext) == ALLOC_CHUNKHDRSZ, "padding calculation in AllocChunkData is wrong");//对齐 /* * First, validate allocation parameters. Once these were regular runtime * test and elog's, but in practice Asserts seem sufficient because nobody * varies their parameters at runtime. We somewhat arbitrarily enforce a * minimum 1K block size. */ //验证参数 Assert(initBlockSize == MAXALIGN(initBlockSize) && initBlockSize >= 1024); Assert(maxBlockSize == MAXALIGN(maxBlockSize) && maxBlockSize >= initBlockSize && AllocHugeSizeIsValid(maxBlockSize)); /* must be safe to double */ Assert(minContextSize == 0 || (minContextSize == MAXALIGN(minContextSize) && minContextSize >= 1024 && minContextSize <= maxBlockSize)); /* * Check whether the parameters match either available freelist. We do * not need to demand a match of maxBlockSize. */ if (minContextSize == ALLOCSET_DEFAULT_MINSIZE && initBlockSize == ALLOCSET_DEFAULT_INITSIZE) freeListIndex = 0; else if (minContextSize == ALLOCSET_SMALL_MINSIZE && initBlockSize == ALLOCSET_SMALL_INITSIZE) freeListIndex = 1; else freeListIndex = -1;//默认为-1 /* * If a suitable freelist entry exists, just recycle that context. */ if (freeListIndex >= 0)//SMALL/DEFAULT值 { AllocSetFreeList *freelist = &context_freelists[freeListIndex];//使用预定义的freelist if (freelist->first_free != NULL) { /* Remove entry from freelist */ set = freelist->first_free;//使用第一个空闲的AllocSetContext freelist->first_free = (AllocSet) set->header.nextchild;//指针指向下一个空闲的AllocSetContext freelist->num_free--;//free计数器减一 /* Update its maxBlockSize; everything else should be OK */ set->maxBlockSize = maxBlockSize;//更新AllocSetContext的相关信息 /* Reinitialize its header, installing correct name and parent */ MemoryContextCreate((MemoryContext) set, T_AllocSetContext, &AllocSetMethods, parent, name);//创建MemoryContext return (MemoryContext) set;//返回 } } //freeListIndex = -1,定制化自己的MemoryContext /* Determine size of initial block */ firstBlockSize = MAXALIGN(sizeof(AllocSetContext)) + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;//申请内存:AllocSetContext大小+Block头部大小+Chunk头部大小 if (minContextSize != 0) firstBlockSize = Max(firstBlockSize, minContextSize); else firstBlockSize = Max(firstBlockSize, initBlockSize); /* * Allocate the initial block. Unlike other aset.c blocks, it starts with * the context header and its block header follows that. */ set = (AllocSet) malloc(firstBlockSize);//分配内存 if (set == NULL)//OOM? { if (TopMemoryContext) MemoryContextStats(TopMemoryContext); ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"), errdetail("Failed while creating memory context \"%s\".", name))); } /* * Avoid writing code that can fail between here and MemoryContextCreate; * we'd leak the header/initial block if we ereport in this stretch. */ /* Fill in the initial block's block header */ //获取Block头部指针,开始填充Block头部信息 block = (AllocBlock) (((char *) set) + MAXALIGN(sizeof(AllocSetContext))); block->aset = set; block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ; block->endptr = ((char *) set) + firstBlockSize; block->prev = NULL; block->next = NULL; /* Mark unallocated space NOACCESS; leave the block header alone. */ VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, block->endptr - block->freeptr); /* Remember block as part of block list */ set->blocks = block; /* Mark block as not to be released at reset time */ set->keeper = block; /* Finish filling in aset-specific parts of the context header */ MemSetAligned(set->freelist, 0, sizeof(set->freelist));//对齐 set->initBlockSize = initBlockSize;//初始化set的各项属性 set->maxBlockSize = maxBlockSize; set->nextBlockSize = initBlockSize; set->freeListIndex = freeListIndex; /* * Compute the allocation chunk size limit for this context. It can't be * more than ALLOC_CHUNK_LIMIT because of the fixed number of freelists. * If maxBlockSize is small then requests exceeding the maxBlockSize, or * even a significant fraction of it, should be treated as large chunks * too. For the typical case of maxBlockSize a power of 2, the chunk size * limit will be at most 1/8th maxBlockSize, so that given a stream of * requests that are all the maximum chunk size we will waste at most * 1/8th of the allocated space. * * We have to have allocChunkLimit a power of two, because the requested * and actually-allocated sizes of any chunk must be on the same side of * the limit, else we get confused about whether the chunk is "big". * * Also, allocChunkLimit must not exceed ALLOCSET_SEPARATE_THRESHOLD. */ StaticAssertStmt(ALLOC_CHUNK_LIMIT == ALLOCSET_SEPARATE_THRESHOLD,//ALLOCSET_SEPARATE_THRESHOLD,8192 "ALLOC_CHUNK_LIMIT != ALLOCSET_SEPARATE_THRESHOLD"); set->allocChunkLimit = ALLOC_CHUNK_LIMIT; while ((Size) (set->allocChunkLimit + ALLOC_CHUNKHDRSZ) > (Size) ((maxBlockSize - ALLOC_BLOCKHDRSZ) / ALLOC_CHUNK_FRACTION))//ALLOC_CHUNK_FRACTION-4 set->allocChunkLimit >>= 1;//计算ChunkLimit上限,每次/2 /* Finally, do the type-independent part of context creation */ MemoryContextCreate((MemoryContext) set, T_AllocSetContext, &AllocSetMethods, parent, name);//创建MemoryContext return (MemoryContext) set; } /* * MemoryContextCreate * Context-type-independent part of context creation. * * This is only intended to be called by context-type-specific * context creation routines, not by the unwashed masses. * * The memory context creation procedure goes like this: * 1. Context-type-specific routine makes some initial space allocation, * including enough space for the context header. If it fails, * it can ereport() with no damage done. * 2. Context-type-specific routine sets up all type-specific fields of * the header (those beyond MemoryContextData proper), as well as any * other management fields it needs to have a fully valid context. * Usually, failure in this step is impossible, but if it's possible * the initial space allocation should be freed before ereport'ing. * 3. Context-type-specific routine calls MemoryContextCreate() to fill in * the generic header fields and link the context into the context tree. * 4. We return to the context-type-specific routine, which finishes * up type-specific initialization. This routine can now do things * that might fail (like allocate more memory), so long as it's * sure the node is left in a state that delete will handle. * * node: the as-yet-uninitialized common part of the context header node. * tag: NodeTag code identifying the memory context type. * methods: context-type-specific methods (usually statically allocated). * parent: parent context, or NULL if this will be a top-level context. * name: name of context (must be statically allocated). * * Context routines generally assume that MemoryContextCreate can't fail, * so this can contain Assert but not elog/ereport. */ void MemoryContextCreate(MemoryContext node, NodeTag tag, const MemoryContextMethods *methods, MemoryContext parent, const char *name) { /* Creating new memory contexts is not allowed in a critical section */ Assert(CritSectionCount == 0); /* Initialize all standard fields of memory context header */ node->type = tag; node->isReset = true; node->methods = methods; node->parent = parent; node->firstchild = NULL; node->prevchild = NULL; node->name = name; node->ident = NULL; node->reset_cbs = NULL; /* OK to link node into context tree */ if (parent) { node->nextchild = parent->firstchild; if (parent->firstchild != NULL) parent->firstchild->prevchild = node; parent->firstchild = node; /* inherit allowInCritSection flag from parent */ node->allowInCritSection = parent->allowInCritSection; } else { node->nextchild = NULL; node->allowInCritSection = false; } VALGRIND_CREATE_MEMPOOL(node, 0, false); }
3、InitPlan
/* ---------------------------------------------------------------- * InitPlan * * Initializes the query plan: open files, allocate storage * and start up the rule manager * ---------------------------------------------------------------- */ static void InitPlan(QueryDesc *queryDesc, int eflags) { CmdType operation = queryDesc->operation;//操作类型 PlannedStmt *plannedstmt = queryDesc->plannedstmt;//已规划的Statement Plan *plan = plannedstmt->planTree;//执行计划 List *rangeTable = plannedstmt->rtable;//本次执行涉及的Table EState *estate = queryDesc->estate;//执行状态 PlanState *planstate;//计划状态 TupleDesc tupType;//Tuple信息 ListCell *l;// int i;// /* * Do permissions checks */ ExecCheckRTPerms(rangeTable, true);//权限检查 /* * initialize the node's execution state */ estate->es_range_table = rangeTable; estate->es_plannedstmt = plannedstmt; /* * initialize result relation stuff, and open/lock the result rels. * * We must do this before initializing the plan tree, else we might try to * do a lock upgrade if a result rel is also a source rel. */ //初始化结果Relation if (plannedstmt->resultRelations) { List *resultRelations = plannedstmt->resultRelations; int numResultRelations = list_length(resultRelations); ResultRelInfo *resultRelInfos; ResultRelInfo *resultRelInfo; resultRelInfos = (ResultRelInfo *) palloc(numResultRelations * sizeof(ResultRelInfo)); resultRelInfo = resultRelInfos; foreach(l, resultRelations) { Index resultRelationIndex = lfirst_int(l); Oid resultRelationOid; Relation resultRelation; resultRelationOid = getrelid(resultRelationIndex, rangeTable); resultRelation = heap_open(resultRelationOid, RowExclusiveLock); InitResultRelInfo(resultRelInfo, resultRelation, resultRelationIndex, NULL, estate->es_instrument); resultRelInfo++; } estate->es_result_relations = resultRelInfos; estate->es_num_result_relations = numResultRelations; /* es_result_relation_info is NULL except when within ModifyTable */ estate->es_result_relation_info = NULL; /* * In the partitioned result relation case, lock the non-leaf result * relations too. A subset of these are the roots of respective * partitioned tables, for which we also allocate ResultRelInfos. */ estate->es_root_result_relations = NULL; estate->es_num_root_result_relations = 0; if (plannedstmt->nonleafResultRelations) { int num_roots = list_length(plannedstmt->rootResultRelations); /* * Firstly, build ResultRelInfos for all the partitioned table * roots, because we will need them to fire the statement-level * triggers, if any. */ resultRelInfos = (ResultRelInfo *) palloc(num_roots * sizeof(ResultRelInfo)); resultRelInfo = resultRelInfos; foreach(l, plannedstmt->rootResultRelations) { Index resultRelIndex = lfirst_int(l); Oid resultRelOid; Relation resultRelDesc; resultRelOid = getrelid(resultRelIndex, rangeTable); resultRelDesc = heap_open(resultRelOid, RowExclusiveLock); InitResultRelInfo(resultRelInfo, resultRelDesc, lfirst_int(l), NULL, estate->es_instrument); resultRelInfo++; } estate->es_root_result_relations = resultRelInfos; estate->es_num_root_result_relations = num_roots; /* Simply lock the rest of them. */ foreach(l, plannedstmt->nonleafResultRelations) { Index resultRelIndex = lfirst_int(l); /* We locked the roots above. */ if (!list_member_int(plannedstmt->rootResultRelations, resultRelIndex)) LockRelationOid(getrelid(resultRelIndex, rangeTable), RowExclusiveLock); } } } else { /* * if no result relation, then set state appropriately */ estate->es_result_relations = NULL; estate->es_num_result_relations = 0; estate->es_result_relation_info = NULL; estate->es_root_result_relations = NULL; estate->es_num_root_result_relations = 0; } /* * Similarly, we have to lock relations selected FOR [KEY] UPDATE/SHARE * before we initialize the plan tree, else we'd be risking lock upgrades. * While we are at it, build the ExecRowMark list. Any partitioned child * tables are ignored here (because isParent=true) and will be locked by * the first Append or MergeAppend node that references them. (Note that * the RowMarks corresponding to partitioned child tables are present in * the same list as the rest, i.e., plannedstmt->rowMarks.) */ estate->es_rowMarks = NIL; foreach(l, plannedstmt->rowMarks) { PlanRowMark *rc = (PlanRowMark *) lfirst(l); Oid relid; Relation relation; ExecRowMark *erm; /* ignore "parent" rowmarks; they are irrelevant at runtime */ if (rc->isParent) continue; /* get relation's OID (will produce InvalidOid if subquery) */ relid = getrelid(rc->rti, rangeTable); /* * If you change the conditions under which rel locks are acquired * here, be sure to adjust ExecOpenScanRelation to match. */ switch (rc->markType) { case ROW_MARK_EXCLUSIVE: case ROW_MARK_NOKEYEXCLUSIVE: case ROW_MARK_SHARE: case ROW_MARK_KEYSHARE: relation = heap_open(relid, RowShareLock); break; case ROW_MARK_REFERENCE: relation = heap_open(relid, AccessShareLock); break; case ROW_MARK_COPY: /* no physical table access is required */ relation = NULL; break; default: elog(ERROR, "unrecognized markType: %d", rc->markType); relation = NULL; /* keep compiler quiet */ break; } /* Check that relation is a legal target for marking */ if (relation) CheckValidRowMarkRel(relation, rc->markType); erm = (ExecRowMark *) palloc(sizeof(ExecRowMark)); erm->relation = relation; erm->relid = relid; erm->rti = rc->rti; erm->prti = rc->prti; erm->rowmarkId = rc->rowmarkId; erm->markType = rc->markType; erm->strength = rc->strength; erm->waitPolicy = rc->waitPolicy; erm->ermActive = false; ItemPointerSetInvalid(&(erm->curCtid)); erm->ermExtra = NULL; estate->es_rowMarks = lappend(estate->es_rowMarks, erm); } /* * Initialize the executor's tuple table to empty. */ estate->es_tupleTable = NIL; estate->es_trig_tuple_slot = NULL; estate->es_trig_oldtup_slot = NULL; estate->es_trig_newtup_slot = NULL; /* mark EvalPlanQual not active */ estate->es_epqTuple = NULL; estate->es_epqTupleSet = NULL; estate->es_epqScanDone = NULL; /* * Initialize private state information for each SubPlan. We must do this * before running ExecInitNode on the main query tree, since * ExecInitSubPlan expects to be able to find these entries. */ Assert(estate->es_subplanstates == NIL); i = 1; /* subplan indices count from 1 */ //初始化子Plan foreach(l, plannedstmt->subplans) { Plan *subplan = (Plan *) lfirst(l); PlanState *subplanstate; int sp_eflags; /* * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. If * it is a parameterless subplan (not initplan), we suggest that it be * prepared to handle REWIND efficiently; otherwise there is no need. */ sp_eflags = eflags & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA); if (bms_is_member(i, plannedstmt->rewindPlanIDs)) sp_eflags |= EXEC_FLAG_REWIND; subplanstate = ExecInitNode(subplan, estate, sp_eflags); estate->es_subplanstates = lappend(estate->es_subplanstates, subplanstate); i++; } /* * Initialize the private state information for all the nodes in the query * tree. This opens files, allocates storage and leaves us ready to start * processing tuples. */ planstate = ExecInitNode(plan, estate, eflags); /* * Get the tuple descriptor describing the type of tuples to return. */ tupType = ExecGetResultType(planstate); /* * Initialize the junk filter if needed. SELECT queries need a filter if * there are any junk attrs in the top-level tlist. */ if (operation == CMD_SELECT) { bool junk_filter_needed = false; ListCell *tlist; foreach(tlist, plan->targetlist) { TargetEntry *tle = (TargetEntry *) lfirst(tlist); if (tle->resjunk) { junk_filter_needed = true; break; } } if (junk_filter_needed) { JunkFilter *j; j = ExecInitJunkFilter(planstate->plan->targetlist, tupType->tdhasoid, ExecInitExtraTupleSlot(estate, NULL)); estate->es_junkFilter = j; /* Want to return the cleaned tuple type */ tupType = j->jf_cleanTupType; } } queryDesc->tupDesc = tupType; queryDesc->planstate = planstate; }/* * ExecCheckRTPerms * Check access permissions for all relations listed in a range table. * * Returns true if permissions are adequate. Otherwise, throws an appropriate * error if ereport_on_violation is true, or simply returns false otherwise. * * Note that this does NOT address row level security policies (aka: RLS). If * rows will be returned to the user as a result of this permission check * passing, then RLS also needs to be consulted (and check_enable_rls()). * * See rewrite/rowsecurity.c. */ bool ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation) { ListCell *l; bool result = true; foreach(l, rangeTable) { RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); result = ExecCheckRTEPerms(rte);//基于ACL Mode的权限检查 if (!result) { Assert(rte->rtekind == RTE_RELATION); if (ereport_on_violation) aclcheck_error(ACLCHECK_NO_PRIV, get_relkind_objtype(get_rel_relkind(rte->relid)), get_rel_name(rte->relid)); return false; } } if (ExecutorCheckPerms_hook) result = (*ExecutorCheckPerms_hook) (rangeTable, ereport_on_violation); return result; } /* ------------------------------------------------------------------------ * ExecInitNode * * Recursively initializes all the nodes in the plan tree rooted * at 'node'. * * Inputs: * 'node' is the current node of the plan produced by the query planner * 'estate' is the shared execution state for the plan tree * 'eflags' is a bitwise OR of flag bits described in executor.h * * Returns a PlanState node corresponding to the given Plan node. * ------------------------------------------------------------------------ */ //初始化节点,返回Plan状态 PlanState * ExecInitNode(Plan *node, EState *estate, int eflags) { PlanState *result; List *subps; ListCell *l; /* * do nothing when we get to the end of a leaf on tree. */ if (node == NULL) return NULL; /* * Make sure there's enough stack available. Need to check here, in * addition to ExecProcNode() (via ExecProcNodeFirst()), to ensure the * stack isn't overrun while initializing the node tree. */ check_stack_depth(); switch (nodeTag(node)) { /* * control nodes */ case T_Result: result = (PlanState *) ExecInitResult((Result *) node, estate, eflags); break; case T_ProjectSet: result = (PlanState *) ExecInitProjectSet((ProjectSet *) node, estate, eflags); break; case T_ModifyTable://插入数据 result = (PlanState *) ExecInitModifyTable((ModifyTable *) node, estate, eflags); break; case T_Append: result = (PlanState *) ExecInitAppend((Append *) node, estate, eflags); break; case T_MergeAppend: result = (PlanState *) ExecInitMergeAppend((MergeAppend *) node, estate, eflags); break; case T_RecursiveUnion: result = (PlanState *) ExecInitRecursiveUnion((RecursiveUnion *) node, estate, eflags); break; case T_BitmapAnd: result = (PlanState *) ExecInitBitmapAnd((BitmapAnd *) node, estate, eflags); break; case T_BitmapOr: result = (PlanState *) ExecInitBitmapOr((BitmapOr *) node, estate, eflags); break; /* * scan nodes */ case T_SeqScan: result = (PlanState *) ExecInitSeqScan((SeqScan *) node, estate, eflags); break; case T_SampleScan: result = (PlanState *) ExecInitSampleScan((SampleScan *) node, estate, eflags); break; case T_IndexScan: result = (PlanState *) ExecInitIndexScan((IndexScan *) node, estate, eflags); break; case T_IndexOnlyScan: result = (PlanState *) ExecInitIndexOnlyScan((IndexOnlyScan *) node, estate, eflags); break; case T_BitmapIndexScan: result = (PlanState *) ExecInitBitmapIndexScan((BitmapIndexScan *) node, estate, eflags); break; case T_BitmapHeapScan: result = (PlanState *) ExecInitBitmapHeapScan((BitmapHeapScan *) node, estate, eflags); break; case T_TidScan: result = (PlanState *) ExecInitTidScan((TidScan *) node, estate, eflags); break; case T_SubqueryScan: result = (PlanState *) ExecInitSubqueryScan((SubqueryScan *) node, estate, eflags); break; case T_FunctionScan: result = (PlanState *) ExecInitFunctionScan((FunctionScan *) node, estate, eflags); break; case T_TableFuncScan: result = (PlanState *) ExecInitTableFuncScan((TableFuncScan *) node, estate, eflags); break; case T_ValuesScan: result = (PlanState *) ExecInitValuesScan((ValuesScan *) node, estate, eflags); break; case T_CteScan: result = (PlanState *) ExecInitCteScan((CteScan *) node, estate, eflags); break; case T_NamedTuplestoreScan: result = (PlanState *) ExecInitNamedTuplestoreScan((NamedTuplestoreScan *) node, estate, eflags); break; case T_WorkTableScan: result = (PlanState *) ExecInitWorkTableScan((WorkTableScan *) node, estate, eflags); break; case T_ForeignScan: result = (PlanState *) ExecInitForeignScan((ForeignScan *) node, estate, eflags); break; case T_CustomScan: result = (PlanState *) ExecInitCustomScan((CustomScan *) node, estate, eflags); break; /* * join nodes */ case T_NestLoop: result = (PlanState *) ExecInitNestLoop((NestLoop *) node, estate, eflags); break; case T_MergeJoin: result = (PlanState *) ExecInitMergeJoin((MergeJoin *) node, estate, eflags); break; case T_HashJoin: result = (PlanState *) ExecInitHashJoin((HashJoin *) node, estate, eflags); break; /* * materialization nodes */ case T_Material: result = (PlanState *) ExecInitMaterial((Material *) node, estate, eflags); break; case T_Sort: result = (PlanState *) ExecInitSort((Sort *) node, estate, eflags); break; case T_Group: result = (PlanState *) ExecInitGroup((Group *) node, estate, eflags); break; case T_Agg: result = (PlanState *) ExecInitAgg((Agg *) node, estate, eflags); break; case T_WindowAgg: result = (PlanState *) ExecInitWindowAgg((WindowAgg *) node, estate, eflags); break; case T_Unique: result = (PlanState *) ExecInitUnique((Unique *) node, estate, eflags); break; case T_Gather: result = (PlanState *) ExecInitGather((Gather *) node, estate, eflags); break; case T_GatherMerge: result = (PlanState *) ExecInitGatherMerge((GatherMerge *) node, estate, eflags); break; case T_Hash: result = (PlanState *) ExecInitHash((Hash *) node, estate, eflags); break; case T_SetOp: result = (PlanState *) ExecInitSetOp((SetOp *) node, estate, eflags); break; case T_LockRows: result = (PlanState *) ExecInitLockRows((LockRows *) node, estate, eflags); break; case T_Limit: result = (PlanState *) ExecInitLimit((Limit *) node, estate, eflags); break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); result = NULL; /* keep compiler quiet */ break; } ExecSetExecProcNode(result, result->ExecProcNode); /* * Initialize any initPlans present in this node. The planner put them in * a separate list for us. */ subps = NIL; foreach(l, node->initPlan) { SubPlan *subplan = (SubPlan *) lfirst(l); SubPlanState *sstate; Assert(IsA(subplan, SubPlan)); sstate = ExecInitSubPlan(subplan, result); subps = lappend(subps, sstate); } result->initPlan = subps; /* Set up instrumentation for this node if requested */ if (estate->es_instrument) result->instrument = InstrAlloc(1, estate->es_instrument); return result; } /* ---------------------------------------------------------------- * ExecInitModifyTable * ---------------------------------------------------------------- */ ModifyTableState * ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) { ModifyTableState *mtstate;//返回结果 CmdType operation = node->operation;//操作类型 int nplans = list_length(node->plans);//节点中的plan个数 ResultRelInfo *saved_resultRelInfo; ResultRelInfo *resultRelInfo;//结果Relation信息 Plan *subplan;//子Plan ListCell *l;//临时变量 int i; Relation rel; bool update_tuple_routing_needed = node->partColsUpdated; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); /* * create state structure */ mtstate = makeNode(ModifyTableState);//构建节点 mtstate->ps.plan = (Plan *) node;//设置Plan mtstate->ps.state = estate;//设置执行状态 mtstate->ps.ExecProcNode = ExecModifyTable;//设置处理函数为ExecModifyTable mtstate->operation = operation;//操作类型 mtstate->canSetTag = node->canSetTag; mtstate->mt_done = false; mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);//分配内存 mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;//结果Relation信息 /* If modifying a partitioned table, initialize the root table info */ if (node->rootResultRelIndex >= 0) mtstate->rootResultRelInfo = estate->es_root_result_relations + node->rootResultRelIndex; mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans); mtstate->mt_nplans = nplans; /* set up epqstate with dummy subplan data for the moment */ EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam); mtstate->fireBSTriggers = true; /* * call ExecInitNode on each of the plans to be executed and save the * results into the array "mt_plans". This is also a convenient place to * verify that the proposed target relations are valid and open their * indexes for insertion of new index entries. Note we *must* set * estate->es_result_relation_info correctly while we initialize each * sub-plan; ExecContextForcesOids depends on that! */ saved_resultRelInfo = estate->es_result_relation_info; resultRelInfo = mtstate->resultRelInfo; i = 0; //初始化每个子Plan,保存在mt_plans数组中 foreach(l, node->plans) { subplan = (Plan *) lfirst(l); /* Initialize the usesFdwDirectModify flag */ resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i, node->fdwDirectModifyPlans); /* * Verify result relation is a valid target for the current operation */ CheckValidResultRel(resultRelInfo, operation); /* * If there are indices on the result relation, open them and save * descriptors in the result relation info, so that we can add new * index entries for the tuples we add/update. We need not do this * for a DELETE, however, since deletion doesn't affect indexes. Also, * inside an EvalPlanQual operation, the indexes might be open * already, since we share the resultrel state with the original * query. */ if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex && operation != CMD_DELETE && resultRelInfo->ri_IndexRelationDescs == NULL) ExecOpenIndices(resultRelInfo, node->onConflictAction != ONCONFLICT_NONE);//初始化Index /* * If this is an UPDATE and a BEFORE UPDATE trigger is present, the * trigger itself might modify the partition-key values. So arrange * for tuple routing. */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_update_before_row && operation == CMD_UPDATE) update_tuple_routing_needed = true; /* Now init the plan for this result rel */ estate->es_result_relation_info = resultRelInfo; mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);//初始化子节点 /* Also let FDWs init themselves for foreign-table result rels */ if (!resultRelInfo->ri_usesFdwDirectModify && resultRelInfo->ri_FdwRoutine != NULL && resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL) { List *fdw_private = (List *) list_nth(node->fdwPrivLists, i); resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate, resultRelInfo, fdw_private, i, eflags); } resultRelInfo++; i++; } estate->es_result_relation_info = saved_resultRelInfo; /* Get the target relation */ rel = (getTargetResultRelInfo(mtstate))->ri_RelationDesc; /* * If it's not a partitioned table after all, UPDATE tuple routing should * not be attempted. */ if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) update_tuple_routing_needed = false; /* * Build state for tuple routing if it's an INSERT or if it's an UPDATE of * partition key. */ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && (operation == CMD_INSERT || update_tuple_routing_needed)) mtstate->mt_partition_tuple_routing = ExecSetupPartitionTupleRouting(mtstate, rel); /* * Build state for collecting transition tuples. This requires having a * valid trigger query context, so skip it in explain-only mode. */ if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY)) ExecSetupTransitionCaptureState(mtstate, estate); /* * Construct mapping from each of the per-subplan partition attnos to the * root attno. This is required when during update row movement the tuple * descriptor of a source partition does not match the root partitioned * table descriptor. In such a case we need to convert tuples to the root * tuple descriptor, because the search for destination partition starts * from the root. Skip this setup if it's not a partition key update. */ if (update_tuple_routing_needed) ExecSetupChildParentMapForSubplan(mtstate); /* * Initialize any WITH CHECK OPTION constraints if needed. */ resultRelInfo = mtstate->resultRelInfo; i = 0; //设置Check选项 foreach(l, node->withCheckOptionLists) { List *wcoList = (List *) lfirst(l); List *wcoExprs = NIL; ListCell *ll; foreach(ll, wcoList) { WithCheckOption *wco = (WithCheckOption *) lfirst(ll); ExprState *wcoExpr = ExecInitQual((List *) wco->qual, mtstate->mt_plans[i]); wcoExprs = lappend(wcoExprs, wcoExpr); } resultRelInfo->ri_WithCheckOptions = wcoList; resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; resultRelInfo++; i++; } /* * Initialize RETURNING projections if needed. */ if (node->returningLists) { TupleTableSlot *slot; ExprContext *econtext; /* * Initialize result tuple slot and assign its rowtype using the first * RETURNING list. We assume the rest will look the same. */ mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists); /* Set up a slot for the output of the RETURNING projection(s) */ ExecInitResultTupleSlotTL(estate, &mtstate->ps); slot = mtstate->ps.ps_ResultTupleSlot; /* Need an econtext too */ if (mtstate->ps.ps_ExprContext == NULL) ExecAssignExprContext(estate, &mtstate->ps); econtext = mtstate->ps.ps_ExprContext; /* * Build a projection for each result rel. */ resultRelInfo = mtstate->resultRelInfo; foreach(l, node->returningLists) { List *rlist = (List *) lfirst(l); resultRelInfo->ri_returningList = rlist; resultRelInfo->ri_projectReturning = ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, resultRelInfo->ri_RelationDesc->rd_att); resultRelInfo++; } } else { /* * We still must construct a dummy result tuple type, because InitPlan * expects one (maybe should change that?). */ mtstate->ps.plan->targetlist = NIL; ExecInitResultTupleSlotTL(estate, &mtstate->ps); mtstate->ps.ps_ExprContext = NULL; } /* Set the list of arbiter indexes if needed for ON CONFLICT */ resultRelInfo = mtstate->resultRelInfo; if (node->onConflictAction != ONCONFLICT_NONE) resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes; /* * If needed, Initialize target list, projection and qual for ON CONFLICT * DO UPDATE. */ if (node->onConflictAction == ONCONFLICT_UPDATE) { ExprContext *econtext; TupleDesc relationDesc; TupleDesc tupDesc; /* insert may only have one plan, inheritance is not expanded */ Assert(nplans == 1); /* already exists if created by RETURNING processing above */ if (mtstate->ps.ps_ExprContext == NULL) ExecAssignExprContext(estate, &mtstate->ps); econtext = mtstate->ps.ps_ExprContext; relationDesc = resultRelInfo->ri_RelationDesc->rd_att; /* * Initialize slot for the existing tuple. If we'll be performing * tuple routing, the tuple descriptor to use for this will be * determined based on which relation the update is actually applied * to, so we don't set its tuple descriptor here. */ mtstate->mt_existing = ExecInitExtraTupleSlot(mtstate->ps.state, mtstate->mt_partition_tuple_routing ? NULL : relationDesc); /* carried forward solely for the benefit of explain */ mtstate->mt_excludedtlist = node->exclRelTlist; /* create state for DO UPDATE SET operation */ resultRelInfo->ri_onConflict = makeNode(OnConflictSetState); /* * Create the tuple slot for the UPDATE SET projection. * * Just like mt_existing above, we leave it without a tuple descriptor * in the case of partitioning tuple routing, so that it can be * changed by ExecPrepareTupleRouting. In that case, we still save * the tupdesc in the parent's state: it can be reused by partitions * with an identical descriptor to the parent. */ tupDesc = ExecTypeFromTL((List *) node->onConflictSet, relationDesc->tdhasoid); mtstate->mt_conflproj = ExecInitExtraTupleSlot(mtstate->ps.state, mtstate->mt_partition_tuple_routing ? NULL : tupDesc); resultRelInfo->ri_onConflict->oc_ProjTupdesc = tupDesc; /* build UPDATE SET projection state */ resultRelInfo->ri_onConflict->oc_ProjInfo = ExecBuildProjectionInfo(node->onConflictSet, econtext, mtstate->mt_conflproj, &mtstate->ps, relationDesc); /* initialize state to evaluate the WHERE clause, if any */ if (node->onConflictWhere) { ExprState *qualexpr; qualexpr = ExecInitQual((List *) node->onConflictWhere, &mtstate->ps); resultRelInfo->ri_onConflict->oc_WhereClause = qualexpr; } } /* * If we have any secondary relations in an UPDATE or DELETE, they need to * be treated like non-locked relations in SELECT FOR UPDATE, ie, the * EvalPlanQual mechanism needs to be told about them. Locate the * relevant ExecRowMarks. */ foreach(l, node->rowMarks) { PlanRowMark *rc = lfirst_node(PlanRowMark, l); ExecRowMark *erm; /* ignore "parent" rowmarks; they are irrelevant at runtime */ if (rc->isParent) continue; /* find ExecRowMark (same for all subplans) */ erm = ExecFindRowMark(estate, rc->rti, false); /* build ExecAuxRowMark for each subplan */ for (i = 0; i < nplans; i++) { ExecAuxRowMark *aerm; subplan = mtstate->mt_plans[i]->plan; aerm = ExecBuildAuxRowMark(erm, subplan->targetlist); mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm); } } /* select first subplan */ mtstate->mt_whichplan = 0; subplan = (Plan *) linitial(node->plans); EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan, mtstate->mt_arowmarks[0]); /* * Initialize the junk filter(s) if needed. INSERT queries need a filter * if there are any junk attrs in the tlist. UPDATE and DELETE always * need a filter, since there's always at least one junk attribute present * --- no need to look first. Typically, this will be a 'ctid' or * 'wholerow' attribute, but in the case of a foreign data wrapper it * might be a set of junk attributes sufficient to identify the remote * row. * * If there are multiple result relations, each one needs its own junk * filter. Note multiple rels are only possible for UPDATE/DELETE, so we * can't be fooled by some needing a filter and some not. * * This section of code is also a convenient place to verify that the * output of an INSERT or UPDATE matches the target table(s). */ { bool junk_filter_needed = false; switch (operation) { case CMD_INSERT: foreach(l, subplan->targetlist) { TargetEntry *tle = (TargetEntry *) lfirst(l); if (tle->resjunk) { junk_filter_needed = true; break; } } break; case CMD_UPDATE: case CMD_DELETE: junk_filter_needed = true; break; default: elog(ERROR, "unknown operation"); break; } if (junk_filter_needed) { resultRelInfo = mtstate->resultRelInfo; for (i = 0; i < nplans; i++) { JunkFilter *j; subplan = mtstate->mt_plans[i]->plan; if (operation == CMD_INSERT || operation == CMD_UPDATE) ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc, subplan->targetlist); j = ExecInitJunkFilter(subplan->targetlist, resultRelInfo->ri_RelationDesc->rd_att->tdhasoid, ExecInitExtraTupleSlot(estate, NULL)); if (operation == CMD_UPDATE || operation == CMD_DELETE) { /* For UPDATE/DELETE, find the appropriate junk attr now */ char relkind; relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind; if (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW || relkind == RELKIND_PARTITIONED_TABLE) { j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid"); if (!AttributeNumberIsValid(j->jf_junkAttNo)) elog(ERROR, "could not find junk ctid column"); } else if (relkind == RELKIND_FOREIGN_TABLE) { /* * When there is a row-level trigger, there should be * a wholerow attribute. */ j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow"); } else { j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow"); if (!AttributeNumberIsValid(j->jf_junkAttNo)) elog(ERROR, "could not find junk wholerow column"); } } resultRelInfo->ri_junkFilter = j; resultRelInfo++; } } else { if (operation == CMD_INSERT) ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc, subplan->targetlist); } } /* * Set up a tuple table slot for use for trigger output tuples. In a plan * containing multiple ModifyTable nodes, all can share one such slot, so * we keep it in the estate. */ if (estate->es_trig_tuple_slot == NULL) estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL); /* * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it * to estate->es_auxmodifytables so that it will be run to completion by * ExecPostprocessPlan. (It'd actually work fine to add the primary * ModifyTable node too, but there's no need.) Note the use of lcons not * lappend: we need later-initialized ModifyTable nodes to be shut down * before earlier ones. This ensures that we don't throw away RETURNING * rows that need to be seen by a later CTE subplan. */ if (!mtstate->canSetTag) estate->es_auxmodifytables = lcons(mtstate, estate->es_auxmodifytables); return mtstate; } /* ---------------- * ModifyTable node - * Apply rows produced by subplan(s) to result table(s), * by inserting, updating, or deleting. * * Note that rowMarks and epqParam are presumed to be valid for all the * subplan(s); they can't contain any info that varies across subplans. * ---------------- */ typedef struct ModifyTable { Plan plan; CmdType operation; /* INSERT, UPDATE, or DELETE */ bool canSetTag; /* do we set the command tag/es_processed? */ Index nominalRelation; /* Parent RT index for use of EXPLAIN */ /* RT indexes of non-leaf tables in a partition tree */ List *partitioned_rels; bool partColsUpdated; /* some part key in hierarchy updated */ List *resultRelations; /* integer list of RT indexes */ int resultRelIndex; /* index of first resultRel in plan's list */ int rootResultRelIndex; /* index of the partitioned table root */ List *plans; /* plan(s) producing source data */ List *withCheckOptionLists; /* per-target-table WCO lists */ List *returningLists; /* per-target-table RETURNING tlists */ List *fdwPrivLists; /* per-target-table FDW private data lists */ Bitmapset *fdwDirectModifyPlans; /* indices of FDW DM plans */ List *rowMarks; /* PlanRowMarks (non-locking only) */ int epqParam; /* ID of Param for EvalPlanQual re-eval */ OnConflictAction onConflictAction; /* ON CONFLICT action */ List *arbiterIndexes; /* List of ON CONFLICT arbiter index OIDs */ List *onConflictSet; /* SET for INSERT ON CONFLICT DO UPDATE */ Node *onConflictWhere; /* WHERE for ON CONFLICT UPDATE */ Index exclRelRTI; /* RTI of the EXCLUDED pseudo relation */ List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ } ModifyTable;
4、ExecutorStart
/* ---------------------------------------------------------------- * ExecutorStart * * This routine must be called at the beginning of any execution of any * query plan * * Takes a QueryDesc previously created by CreateQueryDesc (which is separate * only because some places use QueryDescs for utility commands). The tupDesc * field of the QueryDesc is filled in to describe the tuples that will be * returned, and the internal fields (estate and planstate) are set up. * * eflags contains flag bits as described in executor.h. * * NB: the CurrentMemoryContext when this is called will become the parent * of the per-query context used for this Executor invocation. * * We provide a function hook variable that lets loadable plugins * get control when ExecutorStart is called. Such a plugin would * normally call standard_ExecutorStart(). * * ---------------------------------------------------------------- */ void ExecutorStart(QueryDesc *queryDesc, int eflags)//eflags见后 { if (ExecutorStart_hook) (*ExecutorStart_hook) (queryDesc, eflags);//提供了钩子函数 else standard_ExecutorStart(queryDesc, eflags);//标准函数 } void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)//标准函数 { EState *estate;//执行器状态信息 MemoryContext oldcontext;//原内存上下文 /* sanity checks: queryDesc must not be started already */ Assert(queryDesc != NULL); Assert(queryDesc->estate == NULL); /* * If the transaction is read-only, we need to check if any writes are * planned to non-temporary tables. EXPLAIN is considered read-only. * * Don't allow writes in parallel mode. Supporting UPDATE and DELETE * would require (a) storing the combocid hash in shared memory, rather * than synchronizing it just once at the start of parallelism, and (b) an * alternative to heap_update()'s reliance on xmax for mutual exclusion. * INSERT may have no such troubles, but we forbid it to simplify the * checks. * * We have lower-level defenses in CommandCounterIncrement and elsewhere * against performing unsafe operations in parallel mode, but this gives a * more user-friendly error message. */ if ((XactReadOnly || IsInParallelMode()) && !(eflags & EXEC_FLAG_EXPLAIN_ONLY)) ExecCheckXactReadOnly(queryDesc->plannedstmt);//ReadOnly? /* * Build EState, switch into per-query memory context for startup. */ estate = CreateExecutorState();//创建执行器状态信息 queryDesc->estate = estate;//赋值 oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);//切换上下文 /* * Fill in external parameters, if any, from queryDesc; and allocate * workspace for internal parameters */ estate->es_param_list_info = queryDesc->params;//设置参数 if (queryDesc->plannedstmt->paramExecTypes != NIL)//TODO { int nParamExec; nParamExec = list_length(queryDesc->plannedstmt->paramExecTypes); estate->es_param_exec_vals = (ParamExecData *) palloc0(nParamExec * sizeof(ParamExecData)); } estate->es_sourceText = queryDesc->sourceText;//源SQL语句 /* * Fill in the query environment, if any, from queryDesc. */ estate->es_queryEnv = queryDesc->queryEnv;//查询环境 /* * If non-read-only query, set the command ID to mark output tuples with */ switch (queryDesc->operation) { case CMD_SELECT://查询语句 TODO /* * SELECT FOR [KEY] UPDATE/SHARE and modifying CTEs need to mark * tuples */ if (queryDesc->plannedstmt->rowMarks != NIL || queryDesc->plannedstmt->hasModifyingCTE) estate->es_output_cid = GetCurrentCommandId(true); /* * A SELECT without modifying CTEs can't possibly queue triggers, * so force skip-triggers mode. This is just a marginal efficiency * hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't * all that expensive, but we might as well do it. */ if (!queryDesc->plannedstmt->hasModifyingCTE) eflags |= EXEC_FLAG_SKIP_TRIGGERS; break; case CMD_INSERT://插入语句 case CMD_DELETE: case CMD_UPDATE: estate->es_output_cid = GetCurrentCommandId(true); break; default: elog(ERROR, "unrecognized operation code: %d", (int) queryDesc->operation); break; } /* * Copy other important information into the EState */ estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot); estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot); estate->es_top_eflags = eflags; estate->es_instrument = queryDesc->instrument_options; estate->es_jit_flags = queryDesc->plannedstmt->jitFlags; /* * Set up an AFTER-trigger statement context, unless told not to, or * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called). */ if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY))) AfterTriggerBeginQuery(); /* * Initialize the plan state tree */ InitPlan(queryDesc, eflags);//初始化Plan State tree MemoryContextSwitchTo(oldcontext); } /* * GetCurrentCommandId * * "used" must be true if the caller intends to use the command ID to mark * inserted/updated/deleted tuples. false means the ID is being fetched * for read-only purposes (ie, as a snapshot validity cutoff). See * CommandCounterIncrement() for discussion. */ CommandId GetCurrentCommandId(bool used) { /* this is global to a transaction, not subtransaction-local */ if (used) { /* * Forbid setting currentCommandIdUsed in a parallel worker, because * we have no provision for communicating this back to the master. We * could relax this restriction when currentCommandIdUsed was already * true at the start of the parallel operation. */ Assert(!IsParallelWorker()); currentCommandIdUsed = true; } return currentCommandId; } /* * The "eflags" argument to ExecutorStart and the various ExecInitNode * routines is a bitwise OR of the following flag bits, which tell the * called plan node what to expect. Note that the flags will get modified * as they are passed down the plan tree, since an upper node may require * functionality in its subnode not demanded of the plan as a whole * (example: MergeJoin requires mark/restore capability in its inner input), * or an upper node may shield its input from some functionality requirement * (example: Materialize shields its input from needing to do backward scan). * * EXPLAIN_ONLY indicates that the plan tree is being initialized just so * EXPLAIN can print it out; it will not be run. Hence, no side-effects * of startup should occur. However, error checks (such as permission checks) * should be performed. * * REWIND indicates that the plan node should try to efficiently support * rescans without parameter changes. (Nodes must support ExecReScan calls * in any case, but if this flag was not given, they are at liberty to do it * through complete recalculation. Note that a parameter change forces a * full recalculation in any case.) * * BACKWARD indicates that the plan node must respect the es_direction flag. * When this is not passed, the plan node will only be run forwards. * * MARK indicates that the plan node must support Mark/Restore calls. * When this is not passed, no Mark/Restore will occur. * * SKIP_TRIGGERS tells ExecutorStart/ExecutorFinish to skip calling * AfterTriggerBeginQuery/AfterTriggerEndQuery. This does not necessarily * mean that the plan can't queue any AFTER triggers; just that the caller * is responsible for there being a trigger context for them to be queued in. * * WITH/WITHOUT_OIDS tell the executor to emit tuples with or without space * for OIDs, respectively. These are currently used only for CREATE TABLE AS. * If neither is set, the plan may or may not produce tuples including OIDs. */#define EXEC_FLAG_EXPLAIN_ONLY 0x0001 /* EXPLAIN, no ANALYZE */#define EXEC_FLAG_REWIND 0x0002 /* need efficient rescan */#define EXEC_FLAG_BACKWARD 0x0004 /* need backward scan */#define EXEC_FLAG_MARK 0x0008 /* need mark/restore */#define EXEC_FLAG_SKIP_TRIGGERS 0x0010 /* skip AfterTrigger calls */#define EXEC_FLAG_WITH_OIDS 0x0020 /* force OIDs in returned tuples */#define EXEC_FLAG_WITHOUT_OIDS 0x0040 /* force no OIDs in returned tuples */#define EXEC_FLAG_WITH_NO_DATA 0x0080 /* rel scannability doesn't matter */
5、ExecutorRun
//上一节已介绍
6、ExecutorFinish
/* ---------------------------------------------------------------- * ExecutorFinish * * This routine must be called after the last ExecutorRun call. * It performs cleanup such as firing AFTER triggers. It is * separate from ExecutorEnd because EXPLAIN ANALYZE needs to * include these actions in the total runtime. * * We provide a function hook variable that lets loadable plugins * get control when ExecutorFinish is called. Such a plugin would * normally call standard_ExecutorFinish(). * * ---------------------------------------------------------------- */ void ExecutorFinish(QueryDesc *queryDesc) { if (ExecutorFinish_hook) (*ExecutorFinish_hook) (queryDesc); else standard_ExecutorFinish(queryDesc); } void standard_ExecutorFinish(QueryDesc *queryDesc) { EState *estate; MemoryContext oldcontext; /* sanity checks */ Assert(queryDesc != NULL); estate = queryDesc->estate; Assert(estate != NULL); Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); /* This should be run once and only once per Executor instance */ Assert(!estate->es_finished); /* Switch into per-query memory context */ oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); /* Allow instrumentation of Executor overall runtime */ if (queryDesc->totaltime) InstrStartNode(queryDesc->totaltime); /* Run ModifyTable nodes to completion */ ExecPostprocessPlan(estate); /* Execute queued AFTER triggers, unless told not to */ if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS)) AfterTriggerEndQuery(estate); if (queryDesc->totaltime) InstrStopNode(queryDesc->totaltime, 0); MemoryContextSwitchTo(oldcontext); estate->es_finished = true; } /* ---------------------------------------------------------------- * ExecPostprocessPlan * * Give plan nodes a final chance to execute before shutdown * ---------------------------------------------------------------- */ static void ExecPostprocessPlan(EState *estate) { ListCell *lc; /* * Make sure nodes run forward. */ estate->es_direction = ForwardScanDirection; /* * Run any secondary ModifyTable nodes to completion, in case the main * query did not fetch all rows from them. (We do this to ensure that * such nodes have predictable results.) */ foreach(lc, estate->es_auxmodifytables) { PlanState *ps = (PlanState *) lfirst(lc); for (;;) { TupleTableSlot *slot; /* Reset the per-output-tuple exprcontext each time */ ResetPerTupleExprContext(estate); slot = ExecProcNode(ps); if (TupIsNull(slot)) break; } }
7、ExecutorEnd
/* ---------------------------------------------------------------- * ExecutorEnd * * This routine must be called at the end of execution of any * query plan * * We provide a function hook variable that lets loadable plugins * get control when ExecutorEnd is called. Such a plugin would * normally call standard_ExecutorEnd(). * * ---------------------------------------------------------------- */ void ExecutorEnd(QueryDesc *queryDesc) { if (ExecutorEnd_hook) (*ExecutorEnd_hook) (queryDesc); else standard_ExecutorEnd(queryDesc); } void standard_ExecutorEnd(QueryDesc *queryDesc) { EState *estate; MemoryContext oldcontext; /* sanity checks */ Assert(queryDesc != NULL); estate = queryDesc->estate; Assert(estate != NULL); /* * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This * Assert is needed because ExecutorFinish is new as of 9.1, and callers * might forget to call it. */ Assert(estate->es_finished || (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); /* * Switch into per-query memory context to run ExecEndPlan */ oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); ExecEndPlan(queryDesc->planstate, estate); /* do away with our snapshots */ UnregisterSnapshot(estate->es_snapshot); UnregisterSnapshot(estate->es_crosscheck_snapshot); /* * Must switch out of context before destroying it */ MemoryContextSwitchTo(oldcontext); /* * Release EState and per-query memory context. This should release * everything the executor has allocated. */ FreeExecutorState(estate); /* Reset queryDesc fields that no longer point to anything */ queryDesc->tupDesc = NULL; queryDesc->estate = NULL; queryDesc->planstate = NULL; queryDesc->totaltime = NULL; } /* ---------------------------------------------------------------- * ExecEndPlan * * Cleans up the query plan -- closes files and frees up storage * * NOTE: we are no longer very worried about freeing storage per se * in this code; FreeExecutorState should be guaranteed to release all * memory that needs to be released. What we are worried about doing * is closing relations and dropping buffer pins. Thus, for example, * tuple tables must be cleared or dropped to ensure pins are released. * ---------------------------------------------------------------- */ static void ExecEndPlan(PlanState *planstate, EState *estate) { ResultRelInfo *resultRelInfo; int i; ListCell *l; /* * shut down the node-type-specific query processing */ ExecEndNode(planstate); /* * for subplans too */ foreach(l, estate->es_subplanstates) { PlanState *subplanstate = (PlanState *) lfirst(l); ExecEndNode(subplanstate); } /* * destroy the executor's tuple table. Actually we only care about * releasing buffer pins and tupdesc refcounts; there's no need to pfree * the TupleTableSlots, since the containing memory context is about to go * away anyway. */ ExecResetTupleTable(estate->es_tupleTable, false); /* * close the result relation(s) if any, but hold locks until xact commit. */ resultRelInfo = estate->es_result_relations; for (i = estate->es_num_result_relations; i > 0; i--) { /* Close indices and then the relation itself */ ExecCloseIndices(resultRelInfo); heap_close(resultRelInfo->ri_RelationDesc, NoLock); resultRelInfo++; } /* Close the root target relation(s). */ resultRelInfo = estate->es_root_result_relations; for (i = estate->es_num_root_result_relations; i > 0; i--) { heap_close(resultRelInfo->ri_RelationDesc, NoLock); resultRelInfo++; } /* likewise close any trigger target relations */ ExecCleanUpTriggerState(estate); /* * close any relations selected FOR [KEY] UPDATE/SHARE, again keeping * locks */ foreach(l, estate->es_rowMarks) { ExecRowMark *erm = (ExecRowMark *) lfirst(l); if (erm->relation) heap_close(erm->relation, NoLock); } }
8、FreeQueryDesc
//释放资源 /* * FreeQueryDesc */ void FreeQueryDesc(QueryDesc *qdesc) { /* Can't be a live query */ Assert(qdesc->estate == NULL); /* forget our snapshots */ UnregisterSnapshot(qdesc->snapshot); UnregisterSnapshot(qdesc->crosscheck_snapshot); /* Only the QueryDesc itself need be freed */ pfree(qdesc); }
二、源码解读
/* * ProcessQuery * Execute a single plannable query within a PORTAL_MULTI_QUERY, * PORTAL_ONE_RETURNING, or PORTAL_ONE_MOD_WITH portal * * plan: the plan tree for the query * sourceText: the source text of the query * params: any parameters needed * dest: where to send results * completionTag: points to a buffer of size COMPLETION_TAG_BUFSIZE * in which to store a command completion status string. * * completionTag may be NULL if caller doesn't want a status string. * * Must be called in a memory context that will be reset or deleted on * error; otherwise the executor's memory usage will be leaked. *//*输入: plan-已生成执行计划的语句 sourceText-源SQL语句 params-TODO queryEnv-查询执行的环境 dest-目标接收器 completionTag-完成标记输出: 无*/static voidProcessQuery(PlannedStmt *plan, const char *sourceText, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, char *completionTag){ QueryDesc *queryDesc;//查询描述符 /* * Create the QueryDesc object */ queryDesc = CreateQueryDesc(plan, sourceText, GetActiveSnapshot(), InvalidSnapshot, dest, params, queryEnv, 0);//构造查询描述符 /* * Call ExecutorStart to prepare the plan for execution */ ExecutorStart(queryDesc, 0);//启动执行器 /* * Run the plan to completion. */ ExecutorRun(queryDesc, ForwardScanDirection, 0L, true);//执行 /* * Build command completion status string, if caller wants one. */ if (completionTag)//如果需要完成标记 { Oid lastOid; switch (queryDesc->operation) { case CMD_SELECT: snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "SELECT " UINT64_FORMAT, queryDesc->estate->es_processed); break; case CMD_INSERT://插入语句 if (queryDesc->estate->es_processed == 1) lastOid = queryDesc->estate->es_lastoid; else lastOid = InvalidOid; snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "INSERT %u " UINT64_FORMAT, lastOid, queryDesc->estate->es_processed); break; case CMD_UPDATE: snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "UPDATE " UINT64_FORMAT, queryDesc->estate->es_processed); break; case CMD_DELETE: snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "DELETE " UINT64_FORMAT, queryDesc->estate->es_processed); break; default: strcpy(completionTag, "???"); break; } } /* * Now, we close down all the scans and free allocated resources. */ ExecutorFinish(queryDesc);//完成 ExecutorEnd(queryDesc);//结束 FreeQueryDesc(queryDesc);//释放资源}
三、跟踪分析
插入测试数据:
testdb=# -- #9.1 ProcessQuerytestdb=# -- 获取pidtestdb=# select pg_backend_pid(); pg_backend_pid ---------------- 2551(1 row)testdb=# -- 插入1行testdb=# insert into t_insert values(17,'ProcessQuery','ProcessQuery','ProcessQuery');(挂起)
启动gdb,跟踪调试:
[root@localhost ~]# gdb -p 2551GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-100.el7Copyright (C) 2013 Free Software Foundation, Inc....(gdb) b ProcessQueryBreakpoint 1 at 0x851d19: file pquery.c, line 149.(gdb) cContinuing.Breakpoint 1, ProcessQuery (plan=0x2ccb378, sourceText=0x2c09ef0 "insert into t_insert values(17,'ProcessQuery','ProcessQuery','ProcessQuery');", params=0x0, queryEnv=0x0, dest=0x2ccb4d8, completionTag=0x7ffe94ba4940 "") at pquery.c:149149 queryDesc = CreateQueryDesc(plan, sourceText,#查看参数#1、plan(gdb) p *plan$1 = {type = T_PlannedStmt, commandType = CMD_INSERT, queryId = 0, hasReturning = false, hasModifyingCTE = false, canSetTag = true, transientPlan = false, dependsOnRole = false, parallelModeNeeded = false, jitFlags = 0, planTree = 0x2ccafe8, rtable = 0x2ccb2a8, resultRelations = 0x2ccb348, nonleafResultRelations = 0x0, rootResultRelations = 0x0, subplans = 0x0, rewindPlanIDs = 0x0, rowMarks = 0x0, relationOids = 0x2ccb2f8, invalItems = 0x0, paramExecTypes = 0x2c31370, utilityStmt = 0x0, stmt_location = 0, stmt_len = 76}(gdb) p *(plan->planTree)#执行树,左右均无兄弟节点$2 = {type = T_ModifyTable, startup_cost = 0, total_cost = 0.01, plan_rows = 1, plan_width = 298, parallel_aware = false, parallel_safe = false, plan_node_id = 0, targetlist = 0x0, qual = 0x0, lefttree = 0x0, righttree = 0x0, initPlan = 0x0, extParam = 0x0, allParam = 0x0}(gdb) p *(plan->rtable)$3 = {type = T_List, length = 1, head = 0x2ccb288, tail = 0x2ccb288}(gdb) p *(plan->rtable->head)#Oid=46969208,可使用pg_class查询$4 = {data = {ptr_value = 0x2ccb178, int_value = 46969208, oid_value = 46969208}, next = 0x0}(gdb) p *(plan->resultRelations)$5 = {type = T_IntList, length = 1, head = 0x2ccb328, tail = 0x2ccb328}(gdb) p *(plan->resultRelations->head)#Oid=1?,可使用pg_class查询$6 = {data = {ptr_value = 0x1, int_value = 1, oid_value = 1}, next = 0x0}(gdb) p *(plan->relationOids)$7 = {type = T_OidList, length = 1, head = 0x2ccb2d8, tail = 0x2ccb2d8}(gdb) p *(plan->relationOids->head)#Oid=26731,可使用pg_class查询$8 = {data = {ptr_value = 0x686b, int_value = 26731, oid_value = 26731}, next = 0x0}#2、sourceText(gdb) p sourceText$11 = 0x2c09ef0 "insert into t_insert values(17,'ProcessQuery','ProcessQuery','ProcessQuery');"#3、params(gdb) p params#NULL$12 = (ParamListInfo) 0x0#4、queryEnv(gdb) p queryEnv#NULL$13 = (QueryEnvironment *) 0x0#5、dest(gdb) p dest$14 = (DestReceiver *) 0x2ccb4d8(gdb) p *dest$15 = {receiveSlot = 0x4857ad , rStartup = 0x485196 , rShutdown = 0x485bad , rDestroy = 0x485c21 , mydest = DestRemote}(gdb) #6、completionTag(gdb) p completionTag#空字符串$16 = 0x7ffe94ba4940 ""(gdb) next156 ExecutorStart(queryDesc, 0);(gdb) 161 ExecutorRun(queryDesc, ForwardScanDirection, 0L, true);(gdb) 166 if (completionTag)(gdb) 170 switch (queryDesc->operation)(gdb) 178 if (queryDesc->estate->es_processed == 1)(gdb) 179 lastOid = queryDesc->estate->es_lastoid;(gdb) p queryDesc->estate->es_lastoid$18 = 0(gdb) $19 = 0(gdb) next184 lastOid, queryDesc->estate->es_processed);(gdb) p lastOid$20 = 0(gdb) next182 snprintf(completionTag, COMPLETION_TAG_BUFSIZE,(gdb) 185 break;(gdb) p completionTag#返回标记,在psql中输出的信息$21 = 0x7ffe94ba4940 "INSERT 0 1"(gdb) $22 = 0x7ffe94ba4940 "INSERT 0 1"(gdb) next205 ExecutorFinish(queryDesc);(gdb) 206 ExecutorEnd(queryDesc);(gdb) 208 FreeQueryDesc(queryDesc);(gdb) 209 }(gdb) PortalRunMulti (portal=0x2c6f490, isTopLevel=true, setHoldSnapshot=false, dest=0x2ccb4d8, altdest=0x2ccb4d8, completionTag=0x7ffe94ba4940 "INSERT 0 1") at pquery.c:13021302 if (log_executor_stats)(gdb) #DONE!
为了更深入理解整个执行过程中最重要的两个数据结构PlanState和EState,对子函数InitPlan和CreateExecutorState作进一步的跟踪分析:
InitPlan
testdb=# -- 插入1行testdb=# insert into t_insert values(18,'ProcessQuery.InitPlan','ProcessQuery.InitPlan','ProcessQuery.InitPlan');(挂起)(gdb) b InitPlanBreakpoint 1 at 0x691560: file execMain.c, line 811.#查看参数#1、queryDesc(gdb) p *queryDesc$8 = {operation = CMD_INSERT, plannedstmt = 0x2ccb408, sourceText = 0x2c09ef0 "insert into t_insert values(18,'ProcessQuery.InitPlan','ProcessQuery.InitPlan','ProcessQuery.InitPlan');", snapshot = 0x2c2d920, crosscheck_snapshot = 0x0, dest = 0x2ccb568, params = 0x0, queryEnv = 0x0, instrument_options = 0, tupDesc = 0x0, estate = 0x2cbcc70, planstate = 0x0, already_executed = false, totaltime = 0x0}#2、eflags(gdb) p eflags$9 = 0(gdb) next812 PlannedStmt *plannedstmt = queryDesc->plannedstmt;(gdb) 813 Plan *plan = plannedstmt->planTree;(gdb) 814 List *rangeTable = plannedstmt->rtable;(gdb) p *(queryDesc->plannedstmt)$10 = {type = T_PlannedStmt, commandType = CMD_INSERT, queryId = 0, hasReturning = false, hasModifyingCTE = false, canSetTag = true, transientPlan = false, dependsOnRole = false, parallelModeNeeded = false, jitFlags = 0, planTree = 0x2ccb078, rtable = 0x2ccb338, resultRelations = 0x2ccb3d8, nonleafResultRelations = 0x0, rootResultRelations = 0x0, subplans = 0x0, rewindPlanIDs = 0x0, rowMarks = 0x0, relationOids = 0x2ccb388, invalItems = 0x0, paramExecTypes = 0x2c313f8, utilityStmt = 0x0, stmt_location = 0, stmt_len = 103}(gdb) next815 EState *estate = queryDesc->estate;(gdb) 824 ExecCheckRTPerms(rangeTable, true);(gdb) 829 estate->es_range_table = rangeTable;(gdb) 830 estate->es_plannedstmt = plannedstmt;(gdb) p *rangeTable$11 = {type = T_List, length = 1, head = 0x2ccb318, tail = 0x2ccb318}(gdb) p *(rangeTable->head)$12 = {data = {ptr_value = 0x2ccb208, int_value = 46969352, oid_value = 46969352}, next = 0x0}(gdb) next838 if (plannedstmt->resultRelations)(gdb) 840 List *resultRelations = plannedstmt->resultRelations;(gdb) 841 int numResultRelations = list_length(resultRelations);(gdb) 846 palloc(numResultRelations * sizeof(ResultRelInfo));(gdb) p numResultRelations$13 = 1(gdb) p *(resultRelations->head)$14 = {data = {ptr_value = 0x1, int_value = 1, oid_value = 1}, next = 0x0}(gdb) (gdb) next845 resultRelInfos = (ResultRelInfo *)(gdb) 847 resultRelInfo = resultRelInfos;(gdb) 848 foreach(l, resultRelations)(gdb) 850 Index resultRelationIndex = lfirst_int(l);(gdb) 854 resultRelationOid = getrelid(resultRelationIndex, rangeTable);(gdb) 855 resultRelation = heap_open(resultRelationOid, RowExclusiveLock);(gdb) 857 InitResultRelInfo(resultRelInfo,(gdb) p resultRelationOid$15 = 26731(gdb) p resultRelation$16 = (Relation) 0x7f3a64247b78#目标Relation,t_insert(gdb) p *resultRelation$17 = {rd_node = {spcNode = 1663, dbNode = 16477, relNode = 26747}, rd_smgr = 0x2c99328, rd_refcnt = 1, rd_backend = -1, rd_islocaltemp = false, rd_isnailed = false, rd_isvalid = true, rd_indexvalid = 1 '\001', rd_statvalid = true, rd_createSubid = 0, rd_newRelfilenodeSubid = 0, rd_rel = 0x7f3a64247d88, rd_att = 0x7f3a64247e98, rd_id = 26731, rd_lockInfo = {lockRelId = { relId = 26731, dbId = 16477}}, rd_rules = 0x0, rd_rulescxt = 0x0, trigdesc = 0x0, rd_rsdesc = 0x0, rd_fkeylist = 0x0, rd_fkeyvalid = false, rd_partkeycxt = 0x0, rd_partkey = 0x0, rd_pdcxt = 0x0, rd_partdesc = 0x0, rd_partcheck = 0x0, rd_indexlist = 0x7f3a64249cf0, rd_oidindex = 0, rd_pkindex = 26737, rd_replidindex = 26737, rd_statlist = 0x0, rd_indexattr = 0x0, rd_projindexattr = 0x0, rd_keyattr = 0x0, rd_pkattr = 0x0, rd_idattr = 0x0, rd_projidx = 0x0, rd_pubactions = 0x0, rd_options = 0x0, rd_index = 0x0, rd_indextuple = 0x0, rd_amhandler = 0, rd_indexcxt = 0x0, rd_amroutine = 0x0, rd_opfamily = 0x0, rd_opcintype = 0x0, rd_support = 0x0, rd_supportinfo = 0x0, rd_indoption = 0x0, rd_indexprs = 0x0, rd_indpred = 0x0, rd_exclops = 0x0, rd_exclprocs = 0x0, rd_exclstrats = 0x0, rd_amcache = 0x0, rd_indcollation = 0x0, rd_fdwroutine = 0x0, rd_toastoid = 0, pgstat_info = 0x2c8ae98}(gdb) gdb) next848 foreach(l, resultRelations)(gdb) 864 estate->es_result_relations = resultRelInfos;(gdb) 865 estate->es_num_result_relations = numResultRelations;(gdb) 867 estate->es_result_relation_info = NULL;(gdb) 874 estate->es_root_result_relations = NULL;(gdb) 875 estate->es_num_root_result_relations = 0;(gdb) 876 if (plannedstmt->nonleafResultRelations)(gdb) p *plannedstmt->nonleafResultRelationsCannot access memory at address 0x0(gdb) next941 estate->es_rowMarks = NIL;942 foreach(l, plannedstmt->rowMarks)(gdb) p plannedstmt->rowMarks$19 = (List *) 0x0(gdb) next1003 estate->es_tupleTable = NIL;(gdb) 1004 estate->es_trig_tuple_slot = NULL;(gdb) 1005 estate->es_trig_oldtup_slot = NULL;(gdb) 1006 estate->es_trig_newtup_slot = NULL;(gdb) 1009 estate->es_epqTuple = NULL;(gdb) 1010 estate->es_epqTupleSet = NULL;(gdb) 1011 estate->es_epqScanDone = NULL;(gdb) 1019 i = 1; /* subplan indices count from 1 */(gdb) 1020 foreach(l, plannedstmt->subplans)(gdb) p *plannedstmt->subplansCannot access memory at address 0x0(gdb) next1049 planstate = ExecInitNode(plan, estate, eflags);(gdb) stepExecInitNode (node=0x2ccb078, estate=0x2cbcc70, eflags=0) at execProcnode.c:148148 if (node == NULL)(gdb) next156 check_stack_depth();(gdb) 158 switch (nodeTag(node))(gdb) 174 result = (PlanState *) ExecInitModifyTable((ModifyTable *) node,(gdb) stepExecInitModifyTable (node=0x2ccb078, estate=0x2cbcc70, eflags=0) at nodeModifyTable.c:21792179 CmdType operation = node->operation;(gdb) next2180 int nplans = list_length(node->plans);(gdb) 2187 bool update_tuple_routing_needed = node->partColsUpdated;(gdb) p node->plans$20 = (List *) 0x2c317c8(gdb) p *(node->plans)$21 = {type = T_List, length = 1, head = 0x2ccb058, tail = 0x2ccb058}(gdb) p *(node->plans->head)$22 = {data = {ptr_value = 0x2c315b8, int_value = 46339512, oid_value = 46339512}, next = 0x0}(gdb) next2195 mtstate = makeNode(ModifyTableState);(gdb) 2196 mtstate->ps.plan = (Plan *) node;(gdb) p *mtstate$23 = {ps = {type = T_ModifyTableState, plan = 0x0, state = 0x0, ExecProcNode = 0x0, ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, qual = 0x0, lefttree = 0x0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x0, ps_ExprContext = 0x0, ps_ProjInfo = 0x0, scandesc = 0x0}, operation = CMD_UNKNOWN, canSetTag = false, mt_done = false, mt_plans = 0x0, mt_nplans = 0, mt_whichplan = 0, resultRelInfo = 0x0, rootResultRelInfo = 0x0, mt_arowmarks = 0x0, mt_epqstate = {estate = 0x0, planstate = 0x0, origslot = 0x0, plan = 0x0, arowMarks = 0x0, epqParam = 0}, fireBSTriggers = false, mt_existing = 0x0, mt_excludedtlist = 0x0, mt_conflproj = 0x0, mt_partition_tuple_routing = 0x0, mt_transition_capture = 0x0, mt_oc_transition_capture = 0x0, mt_per_subplan_tupconv_maps = 0x0}(gdb) next2197 mtstate->ps.state = estate;(gdb) 2198 mtstate->ps.ExecProcNode = ExecModifyTable;(gdb) 2200 mtstate->operation = operation;(gdb) 2201 mtstate->canSetTag = node->canSetTag;(gdb) 2202 mtstate->mt_done = false;(gdb) 2204 mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);(gdb) 2205 mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;(gdb) 2208 if (node->rootResultRelIndex >= 0)(gdb) 2212 mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);(gdb) 2213 mtstate->mt_nplans = nplans;(gdb) 2216 EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);(gdb) 2217 mtstate->fireBSTriggers = true;(gdb) 2227 saved_resultRelInfo = estate->es_result_relation_info;(gdb) p *(mtstate->mt_epqstate)Structure has no component named operator*.(gdb) p mtstate->mt_epqstate$24 = {estate = 0x0, planstate = 0x0, origslot = 0x0, plan = 0x0, arowMarks = 0x0, epqParam = 0}(gdb) next2229 resultRelInfo = mtstate->resultRelInfo;(gdb) 2230 i = 0;(gdb) 2231 foreach(l, node->plans)(gdb) 2233 subplan = (Plan *) lfirst(l);(gdb) 2237 node->fdwDirectModifyPlans);(gdb) 2236 resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,(gdb) 2242 CheckValidResultRel(resultRelInfo, operation);(gdb) 2253 if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&(gdb) 2255 resultRelInfo->ri_IndexRelationDescs == NULL)(gdb) 2254 operation != CMD_DELETE &&(gdb) 2257 node->onConflictAction != ONCONFLICT_NONE);(gdb) 2256 ExecOpenIndices(resultRelInfo,(gdb) 2264 if (resultRelInfo->ri_TrigDesc &&(gdb) 2270 estate->es_result_relation_info = resultRelInfo;(gdb) 2271 mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);(gdb) finishRun till exit from #0 ExecInitModifyTable (node=0x2ccb078, estate=0x2cbcc70, eflags=0) at nodeModifyTable.c:22940x000000000069a21a in ExecInitNode (node=0x2ccb078, estate=0x2cbcc70, eflags=0) at execProcnode.c:174174 result = (PlanState *) ExecInitModifyTable((ModifyTable *) node,Value returned is $25 = (ModifyTableState *) 0x2cbcfc0(gdb) next176 break;(gdb) 373 ExecSetExecProcNode(result, result->ExecProcNode);(gdb) p result->ExecProcNode$26 = (ExecProcNodeMtd) 0x6c2485 (gdb) finishRun till exit from #0 ExecInitNode (node=0x2ccb078, estate=0x2cbcc70, eflags=0) at execProcnode.c:3920x0000000000691c2f in InitPlan (queryDesc=0x2cc1580, eflags=0) at execMain.c:10491049 planstate = ExecInitNode(plan, estate, eflags);Value returned is $27 = (PlanState *) 0x2cbcfc0(gdb) next1054 tupType = ExecGetResultType(planstate);(gdb) 1060 if (operation == CMD_SELECT)(gdb) p tupType$28 = (TupleDesc) 0x2cbdd40(gdb) p *tupType$29 = {natts = 0, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x2cbdd60}(gdb) next1090 queryDesc->tupDesc = tupType;(gdb) 1091 queryDesc->planstate = planstate;(gdb) 1092 }(gdb) standard_ExecutorStart (queryDesc=0x2cc1580, eflags=0) at execMain.c:266266 MemoryContextSwitchTo(oldcontext);(gdb) #DONE!
CreateExecutorState
#gdb(gdb) b CreateExecutorStateBreakpoint 1 at 0x69f2c5: file execUtils.c, line 89.#psqltestdb=# -- 插入1行testdb=# insert into t_insert values(19,'ProcessQuery.CreateExecutorState','ProcessQuery.CreateExecutorState','ProcessQuery.CreateExecutorState');(挂起)#gdb(gdb) cContinuing.Breakpoint 1, CreateExecutorState () at execUtils.c:8989 qcontext = AllocSetContextCreate(CurrentMemoryContext,#查看输入参数#该函数无输入参数(gdb) step #进入AllocSetContextCreate函数内部AllocSetContextCreateExtended (parent=0x2c09de0, name=0xb1a840 "ExecutorState", minContextSize=0, initBlockSize=8192, maxBlockSize=8388608) at aset.c:426426 if (minContextSize == ALLOCSET_DEFAULT_MINSIZE &&(gdb) next428 freeListIndex = 0;#查看AllocSetContextCreate的输入参数#1、parent(gdb) p *parent$3 = {type = T_AllocSetContext, isReset = false, allowInCritSection = false, methods = 0xb8c720 , parent = 0x2c04ba0, firstchild = 0x0, prevchild = 0x2c7fc60, nextchild = 0x2cb6b30, name = 0xb4e87c "MessageContext", ident = 0x0, reset_cbs = 0x0}#顶层Context(gdb) p *(parent->parent)$4 = {type = T_AllocSetContext, isReset = false, allowInCritSection = false, methods = 0xb8c720 , parent = 0x0, firstchild = 0x2c2d7e0, prevchild = 0x0, nextchild = 0x0, name = 0xb8d050 "TopMemoryContext", ident = 0x0, reset_cbs = 0x0}#2、name(gdb) p name$5 = 0xb1a840 "ExecutorState"#3、minContextSize(gdb) p minContextSize$6 = 0#4、initBlockSize(gdb) p initBlockSize$7 = 8192 #8KB#5、maxBlockSize(gdb) p maxBlockSize$8 = 8388608 #8MB(gdb) next440 AllocSetFreeList *freelist = &context_freelists[freeListIndex];(gdb) p freeListIndex $9 = 0(gdb) $10 = 0(gdb) next442 if (freelist->first_free != NULL)(gdb) p *freelist$11 = {num_free = 4, first_free = 0x2cbcb60}(gdb) p *(freelist->first_free)$12 = {header = {type = T_AllocSetContext, isReset = true, allowInCritSection = false, methods = 0xb8c720 , parent = 0x0, firstchild = 0x0, prevchild = 0x0, nextchild = 0x2cbeb70, name = 0xb1a840 "ExecutorState", ident = 0x0, reset_cbs = 0x0}, blocks = 0x2cbcc38, freelist = {0x0 }, initBlockSize = 8192, maxBlockSize = 8388608, nextBlockSize = 8192, allocChunkLimit = 8192, keeper = 0x2cbcc38, freeListIndex = 0}(gdb) next445 set = freelist->first_free;(gdb) 446 freelist->first_free = (AllocSet) set->header.nextchild;(gdb) 447 freelist->num_free--;(gdb) 450 set->maxBlockSize = maxBlockSize;(gdb) 453 MemoryContextCreate((MemoryContext) set,(gdb) 459 return (MemoryContext) set;(gdb) p *set$13 = {header = {type = T_AllocSetContext, isReset = true, allowInCritSection = false, methods = 0xb8c720 , parent = 0x2c09de0, firstchild = 0x0, prevchild = 0x0, nextchild = 0x0, name = 0xb1a840 "ExecutorState", ident = 0x0, reset_cbs = 0x0}, blocks = 0x2cbcc38, freelist = {0x0 }, initBlockSize = 8192, maxBlockSize = 8388608, nextBlockSize = 8192, allocChunkLimit = 8192, keeper = 0x2cbcc38, freeListIndex = 0}(gdb) next548 }(gdb) CreateExecutorState () at execUtils.c:9797 oldcontext = MemoryContextSwitchTo(qcontext);(gdb) 99 estate = makeNode(EState);(gdb) 104 estate->es_direction = ForwardScanDirection;(gdb) finishRun till exit from #0 CreateExecutorState () at execUtils.c:1040x000000000078cc2f in evaluate_expr (expr=0x2c30520, result_type=1043, result_typmod=44, result_collation=100) at clauses.c:48584858 estate = CreateExecutorState();Value returned is $14 = (EState *) 0x2cbcc70
四、小结
1、TODO:更进一步的理解,在执行查询语句时再进一步解读;
2、EState&PlanState数据结构:在此函数中构造,需进一步理解;
3、List数据结构:PG广泛的使用List这样的数据结构对各种信息进行管理。
函数
信息
数据
查询
参数
内存
结构
语句
大小
数据结构
状态
节点
输入
上下
上下文
头部
指针
标记
类型
结果
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
银行 网络安全案例分析
新华三网络安全负载
余姚敏捷软件开发平台
综评显示服务器异常
星辰影视怎么无网络或服务器异常
提高网络安全防范意识演讲
服务器能做电脑主机
蓝鲸网络安全团队
软件开发培训诈骗
关于网络技术的ppt
北京学软件开发的大学
银联接口软件开发公司
机顶盒做服务器
数据库安全应急演练
申汇海南互联网科技有限公司
汽车车载网络技术有哪些
数据库理论与技术实验报告
学习苹果软件开发
我的世界侏罗纪服务器指令
乾颐堂华为网络技术综合就业
模拟器100开服务器
服务器灰尘多可以清理吗
服务器管理器报错复位
数据库技术a
制造软件开发服务电话
数据库表别名的使用
服务器怎么多开传奇引擎
关于手机网络安全的新闻
手机网络安全问题有哪些
读博要不要结合软件开发