PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么
发表于:2025-01-31 作者:千家信息网编辑
千家信息网最后更新 2025年01月31日,这篇文章主要介绍"PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么",在日常操作中,相信很多人在PostgreSQL中query_planner中
千家信息网最后更新 2025年01月31日PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么
这篇文章主要介绍"PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么",在日常操作中,相信很多人在PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答"PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么"的疑惑有所帮助!接下来,请跟着小编一起来学习吧!
query_planner代码片段:
//... /* * Ready to do the primary planning. */ final_rel = make_one_rel(root, joinlist);//执行主要的计划过程 /* Check that we got at least one usable path */ if (!final_rel || !final_rel->cheapest_total_path || final_rel->cheapest_total_path->param_info != NULL) elog(ERROR, "failed to construct the join relation");//检查 return final_rel;//返回结果 //...
一、数据结构
RelOptInfo
typedef struct RelOptInfo { NodeTag type;//节点标识 RelOptKind reloptkind;//RelOpt类型 /* all relations included in this RelOptInfo */ Relids relids; /*Relids(rtindex)集合 set of base relids (rangetable indexes) */ /* size estimates generated by planner */ double rows; /*结果元组的估算数量 estimated number of result tuples */ /* per-relation planner control flags */ bool consider_startup; /*是否考虑启动成本?是,需要保留启动成本低的路径 keep cheap-startup-cost paths? */ bool consider_param_startup; /*是否考虑参数化?的路径 ditto, for parameterized paths? */ bool consider_parallel; /*是否考虑并行处理路径 consider parallel paths? */ /* default result targetlist for Paths scanning this relation */ struct PathTarget *reltarget; /*扫描该Relation时默认的结果 list of Vars/Exprs, cost, width */ /* materialization information */ List *pathlist; /*访问路径链表 Path structures */ List *ppilist; /*路径链表中使用参数化路径进行 ParamPathInfos used in pathlist */ List *partial_pathlist; /* partial Paths */ struct Path *cheapest_startup_path;//代价最低的启动路径 struct Path *cheapest_total_path;//代价最低的整体路径 struct Path *cheapest_unique_path;//代价最低的获取唯一值的路径 List *cheapest_parameterized_paths;//代价最低的参数化?路径链表 /* parameterization information needed for both base rels and join rels */ /* (see also lateral_vars and lateral_referencers) */ Relids direct_lateral_relids; /*使用lateral语法,需依赖的Relids rels directly laterally referenced */ Relids lateral_relids; /* minimum parameterization of rel */ /* information about a base rel (not set for join rels!) */ //reloptkind=RELOPT_BASEREL时使用的数据结构 Index relid; /* Relation ID */ Oid reltablespace; /* 表空间 containing tablespace */ RTEKind rtekind; /* 基表?子查询?还是函数等等?RELATION, SUBQUERY, FUNCTION, etc */ AttrNumber min_attr; /* 最小的属性编号 smallest attrno of rel (often <0) */ AttrNumber max_attr; /* 最大的属性编号 largest attrno of rel */ Relids *attr_needed; /* 数组 array indexed [min_attr .. max_attr] */ int32 *attr_widths; /* 属性宽度 array indexed [min_attr .. max_attr] */ List *lateral_vars; /* 关系依赖的Vars/PHVs LATERAL Vars and PHVs referenced by rel */ Relids lateral_referencers; /*依赖该关系的Relids rels that reference me laterally */ List *indexlist; /* 该关系的IndexOptInfo链表 list of IndexOptInfo */ List *statlist; /* 统计信息链表 list of StatisticExtInfo */ BlockNumber pages; /* 块数 size estimates derived from pg_class */ double tuples; /* 元组数 */ double allvisfrac; /* ? */ PlannerInfo *subroot; /* 如为子查询,存储子查询的root if subquery */ List *subplan_params; /* 如为子查询,存储子查询的参数 if subquery */ int rel_parallel_workers; /* 并行执行,需要多少个workers? wanted number of parallel workers */ /* Information about foreign tables and foreign joins */ //FWD相关信息 Oid serverid; /* identifies server for the table or join */ Oid userid; /* identifies user to check access as */ bool useridiscurrent; /* join is only valid for current user */ /* use "struct FdwRoutine" to avoid including fdwapi.h here */ struct FdwRoutine *fdwroutine; void *fdw_private; /* cache space for remembering if we have proven this relation unique */ //已知的,可保证唯一的Relids链表 List *unique_for_rels; /* known unique for these other relid * set(s) */ List *non_unique_for_rels; /* 已知的,不唯一的Relids链表 known not unique for these set(s) */ /* used by various scans and joins: */ List *baserestrictinfo; /* 如为基本关系,存储约束条件 RestrictInfo structures (if base rel) */ QualCost baserestrictcost; /* 解析约束表达式的成本? cost of evaluating the above */ Index baserestrict_min_security; /* 最低安全等级 min security_level found in * baserestrictinfo */ List *joininfo; /* 连接语句的约束条件信息 RestrictInfo structures for join clauses * involving this rel */ bool has_eclass_joins; /* 是否存在等价类连接? T means joininfo is incomplete */ /* used by partitionwise joins: */ bool consider_partitionwise_join; /* 分区? consider partitionwise * join paths? (if * partitioned rel) */ Relids top_parent_relids; /* Relids of topmost parents (if "other" * rel) */ /* used for partitioned relations */ //分区表使用 PartitionScheme part_scheme; /* 分区的schema Partitioning scheme. */ int nparts; /* 分区数 number of partitions */ struct PartitionBoundInfoData *boundinfo; /* 分区边界信息 Partition bounds */ List *partition_qual; /* 分区约束 partition constraint */ struct RelOptInfo **part_rels; /* 分区的RelOptInfo数组 Array of RelOptInfos of partitions, * stored in the same order of bounds */ List **partexprs; /* 非空分区键表达式 Non-nullable partition key expressions. */ List **nullable_partexprs; /* 可为空的分区键表达式 Nullable partition key expressions. */ List *partitioned_child_rels; /* RT Indexes链表 List of RT indexes. */ } RelOptInfo;
二、源码解读
make_one_rel
make_one_rel函数找出执行查询的所有可能访问路径,但不考虑上层的Non-SPJ操作,返回一个最上层的RelOptInfo.
make_one_rel函数分为两个阶段:生成扫描路径(set_base_rel_pathlists)和生成连接路径(make_rel_from_joinlist).
注:SPJ是指选择(Select)/投影(Project)/连接(Join),相对应的Non-SPJ操作是指Group分组/Sort排序等操作
/* * make_one_rel * Finds all possible access paths for executing a query, returning a * single rel that represents the join of all base rels in the query. */ RelOptInfo * make_one_rel(PlannerInfo *root, List *joinlist) { RelOptInfo *rel; Index rti; /* * Construct the all_baserels Relids set. */ root->all_baserels = NULL; for (rti = 1; rti < root->simple_rel_array_size; rti++)//遍历RelOptInfo { RelOptInfo *brel = root->simple_rel_array[rti]; /* there may be empty slots corresponding to non-baserel RTEs */ if (brel == NULL) continue; Assert(brel->relid == rti); /* sanity check on array */ /* ignore RTEs that are "other rels" */ if (brel->reloptkind != RELOPT_BASEREL) continue; root->all_baserels = bms_add_member(root->all_baserels, brel->relid);//添加到all_baserels遍历中 } /* Mark base rels as to whether we care about fast-start plans */ //设置RelOptInfo的consider_param_startup变量,是否考量fast-start plans set_base_rel_consider_startup(root); /* * Compute size estimates and consider_parallel flags for each base rel, * then generate access paths. */ set_base_rel_sizes(root);//估算Relation的Size并且设置consider_parallel标记 set_base_rel_pathlists(root);//生成Relation的扫描(访问)路径 /* * Generate access paths for the entire join tree. * 通过动态规划或遗传算法生成连接路径 */ rel = make_rel_from_joinlist(root, joinlist); /* * The result should join all and only the query's base rels. */ Assert(bms_equal(rel->relids, root->all_baserels)); //返回最上层的RelOptInfo return rel; }//-------------------------------------------------------- /* * set_base_rel_consider_startup * Set the consider_[param_]startup flags for each base-relation entry. * * For the moment, we only deal with consider_param_startup here; because the * logic for consider_startup is pretty trivial and is the same for every base * relation, we just let build_simple_rel() initialize that flag correctly to * start with. If that logic ever gets more complicated it would probably * be better to move it here. */ static void set_base_rel_consider_startup(PlannerInfo *root) { /* * Since parameterized paths can only be used on the inside of a nestloop * join plan, there is usually little value in considering fast-start * plans for them. However, for relations that are on the RHS of a SEMI * or ANTI join, a fast-start plan can be useful because we're only going * to care about fetching one tuple anyway. * * To minimize growth of planning time, we currently restrict this to * cases where the RHS is a single base relation, not a join; there is no * provision for consider_param_startup to get set at all on joinrels. * Also we don't worry about appendrels. costsize.c's costing rules for * nestloop semi/antijoins don't consider such cases either. */ ListCell *lc; foreach(lc, root->join_info_list) { SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(lc); int varno; if ((sjinfo->jointype == JOIN_SEMI || sjinfo->jointype == JOIN_ANTI) && bms_get_singleton_member(sjinfo->syn_righthand, &varno)) { RelOptInfo *rel = find_base_rel(root, varno); rel->consider_param_startup = true; } } }//-------------------------------------------------------- /* * set_base_rel_sizes * Set the size estimates (rows and widths) for each base-relation entry. * Also determine whether to consider parallel paths for base relations. * * We do this in a separate pass over the base rels so that rowcount * estimates are available for parameterized path generation, and also so * that each rel's consider_parallel flag is set correctly before we begin to * generate paths. */ static void set_base_rel_sizes(PlannerInfo *root) { Index rti; for (rti = 1; rti < root->simple_rel_array_size; rti++)//遍历RelOptInfo数组 { RelOptInfo *rel = root->simple_rel_array[rti]; RangeTblEntry *rte; /* there may be empty slots corresponding to non-baserel RTEs */ if (rel == NULL) continue; Assert(rel->relid == rti); /* sanity check on array */ /* ignore RTEs that are "other rels" */ if (rel->reloptkind != RELOPT_BASEREL) continue; rte = root->simple_rte_array[rti]; /* * If parallelism is allowable for this query in general, see whether * it's allowable for this rel in particular. We have to do this * before set_rel_size(), because (a) if this rel is an inheritance * parent, set_append_rel_size() will use and perhaps change the rel's * consider_parallel flag, and (b) for some RTE types, set_rel_size() * goes ahead and makes paths immediately. */ if (root->glob->parallelModeOK) set_rel_consider_parallel(root, rel, rte); set_rel_size(root, rel, rti, rte); } } /* * set_rel_size * Set size estimates for a base relation */ static void set_rel_size(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { if (rel->reloptkind == RELOPT_BASEREL && relation_excluded_by_constraints(root, rel, rte)) { /* * We proved we don't need to scan the rel via constraint exclusion, * so set up a single dummy path for it. Here we only check this for * regular baserels; if it's an otherrel, CE was already checked in * set_append_rel_size(). * * In this case, we go ahead and set up the relation's path right away * instead of leaving it for set_rel_pathlist to do. This is because * we don't have a convention for marking a rel as dummy except by * assigning a dummy path to it. */ set_dummy_rel_pathlist(rel);// } else if (rte->inh)//inherit table { /* It's an "append relation", process accordingly */ set_append_rel_size(root, rel, rti, rte); } else { switch (rel->rtekind) { case RTE_RELATION://数据表 if (rte->relkind == RELKIND_FOREIGN_TABLE)//FDW { /* Foreign table */ set_foreign_size(root, rel, rte); } else if (rte->relkind == RELKIND_PARTITIONED_TABLE)//分区表 { /* * A partitioned table without any partitions is marked as * a dummy rel. */ set_dummy_rel_pathlist(rel); } else if (rte->tablesample != NULL)//采样表 { /* Sampled relation */ set_tablesample_rel_size(root, rel, rte); } else { /* Plain relation */ set_plain_rel_size(root, rel, rte);//常规的数据表 } break; case RTE_SUBQUERY://子查询 /* * Subqueries don't support making a choice between * parameterized and unparameterized paths, so just go ahead * and build their paths immediately. */ set_subquery_pathlist(root, rel, rti, rte);//生成子查询访问路径 break; case RTE_FUNCTION://FUNCTION set_function_size_estimates(root, rel); break; case RTE_TABLEFUNC://TABLEFUNC set_tablefunc_size_estimates(root, rel); break; case RTE_VALUES://VALUES set_values_size_estimates(root, rel); break; case RTE_CTE://CTE /* * CTEs don't support making a choice between parameterized * and unparameterized paths, so just go ahead and build their * paths immediately. */ if (rte->self_reference) set_worktable_pathlist(root, rel, rte); else set_cte_pathlist(root, rel, rte); break; case RTE_NAMEDTUPLESTORE://NAMEDTUPLESTORE,命名元组存储 set_namedtuplestore_pathlist(root, rel, rte); break; default: elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); break; } } /* * We insist that all non-dummy rels have a nonzero rowcount estimate. */ Assert(rel->rows > 0 || IS_DUMMY_REL(rel)); } //-------------------------------------------------------- /* * set_base_rel_pathlists * Finds all paths available for scanning each base-relation entry. * Sequential scan and any available indices are considered. * Each useful path is attached to its relation's 'pathlist' field. * * 为每一个base rels找出所有可用的访问路径(顺序扫描和所有可用的索引都会考虑在内) * 每一个可用的路径都会添加到pathlist链表中 * */ static void set_base_rel_pathlists(PlannerInfo *root) { Index rti; for (rti = 1; rti < root->simple_rel_array_size; rti++)//遍历RelOptInfo数组 { RelOptInfo *rel = root->simple_rel_array[rti]; /* there may be empty slots corresponding to non-baserel RTEs */ if (rel == NULL) continue; Assert(rel->relid == rti); /* sanity check on array */ /* ignore RTEs that are "other rels" */ if (rel->reloptkind != RELOPT_BASEREL) continue; set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]); } } /* * set_rel_pathlist * Build access paths for a base relation */ static void set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { if (IS_DUMMY_REL(rel)) { /* We already proved the relation empty, so nothing more to do */ } else if (rte->inh)//inherit { /* It's an "append relation", process accordingly */ set_append_rel_pathlist(root, rel, rti, rte); } else//常规 { switch (rel->rtekind) { case RTE_RELATION://数据表 if (rte->relkind == RELKIND_FOREIGN_TABLE)//FDW { /* Foreign table */ set_foreign_pathlist(root, rel, rte); } else if (rte->tablesample != NULL)//采样表 { /* Sampled relation */ set_tablesample_rel_pathlist(root, rel, rte); } else//常规数据表 { /* Plain relation */ set_plain_rel_pathlist(root, rel, rte); } break; case RTE_SUBQUERY://子查询 /* Subquery --- 已在set_rel_size处理,fully handled during set_rel_size */ break; case RTE_FUNCTION: /* RangeFunction */ set_function_pathlist(root, rel, rte); break; case RTE_TABLEFUNC: /* Table Function */ set_tablefunc_pathlist(root, rel, rte); break; case RTE_VALUES: /* Values list */ set_values_pathlist(root, rel, rte); break; case RTE_CTE: /* CTE reference --- fully handled during set_rel_size */ break; case RTE_NAMEDTUPLESTORE: /* tuplestore reference --- fully handled during set_rel_size */ break; default: elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind); break; } } /* * If this is a baserel, we should normally consider gathering any partial * paths we may have created for it. * * However, if this is an inheritance child, skip it. Otherwise, we could * end up with a very large number of gather nodes, each trying to grab * its own pool of workers. Instead, we'll consider gathering partial * paths for the parent appendrel. * * Also, if this is the topmost scan/join rel (that is, the only baserel), * we postpone this until the final scan/join targelist is available (see * grouping_planner). */ if (rel->reloptkind == RELOPT_BASEREL && bms_membership(root->all_baserels) != BMS_SINGLETON) generate_gather_paths(root, rel, false); /* * Allow a plugin to editorialize on the set of Paths for this base * relation. It could add new paths (such as CustomPaths) by calling * add_path(), or delete or modify paths added by the core code. */ if (set_rel_pathlist_hook)//钩子函数 (*set_rel_pathlist_hook) (root, rel, rti, rte); /* Now find the cheapest of the paths for this rel */ set_cheapest(rel);//找出代价最低的访问路径 #ifdef OPTIMIZER_DEBUG debug_print_rel(root, rel); #endif }//------------------------------------------------------------ /* * make_rel_from_joinlist * Build access paths using a "joinlist" to guide the join path search. * * 根据joinlist构建连接访问路径,joinlist是函数deconstruct_jointree函数的返回 * * See comments for deconstruct_jointree() for definition of the joinlist * data structure. */ static RelOptInfo * make_rel_from_joinlist(PlannerInfo *root, List *joinlist) { int levels_needed; List *initial_rels; ListCell *jl; /* * Count the number of child joinlist nodes. This is the depth of the * dynamic-programming algorithm we must employ to consider all ways of * joining the child nodes. */ levels_needed = list_length(joinlist);//joinlist链表长度 if (levels_needed <= 0) return NULL; /* nothing to do? */ /* * Construct a list of rels corresponding to the child joinlist nodes. * This may contain both base rels and rels constructed according to * sub-joinlists. */ initial_rels = NIL; foreach(jl, joinlist)//遍历链表 { Node *jlnode = (Node *) lfirst(jl); RelOptInfo *thisrel; if (IsA(jlnode, RangeTblRef))//RTR { int varno = ((RangeTblRef *) jlnode)->rtindex; thisrel = find_base_rel(root, varno); } else if (IsA(jlnode, List)) { /* Recurse to handle subproblem */ thisrel = make_rel_from_joinlist(root, (List *) jlnode);//递归调用 } else { elog(ERROR, "unrecognized joinlist node type: %d", (int) nodeTag(jlnode)); thisrel = NULL; /* keep compiler quiet */ } initial_rels = lappend(initial_rels, thisrel);//加入初始化链表中 } if (levels_needed == 1) { /* * Single joinlist node, so we're done. */ return (RelOptInfo *) linitial(initial_rels); } else { /* * Consider the different orders in which we could join the rels, * using a plugin, GEQO, or the regular join search code. * * We put the initial_rels list into a PlannerInfo field because * has_legal_joinclause() needs to look at it (ugly :-(). */ root->initial_rels = initial_rels; if (join_search_hook)//钩子函数 return (*join_search_hook) (root, levels_needed, initial_rels); else if (enable_geqo && levels_needed >= geqo_threshold) return geqo(root, levels_needed, initial_rels);//通过遗传算法构建连接访问路径 else return standard_join_search(root, levels_needed, initial_rels);//通过动态规划算法构建连接路径 } }
到此,关于"PostgreSQL中query_planner中主计划函数make_one_rel的主实现逻辑是什么"的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注网站,小编会继续努力为大家带来更多实用的文章!
路径
函数
查询
最低
数据
代价
生成
逻辑
信息
参数
数据表
数组
存储
学习
上层
属性
常规
成本
算法
结果
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
导出数据库文件后怎么导入
河北叶烂网络技术有限公司
实名认证服务器错误什么意思
虎林软件开发文档在线咨询
网络安全从哪方面考虑
福州经联科技网络技术有限公司
网络安全技术员工作图片
软件开发技能证
国家网络安全宣传周部队新闻
济南瑞驿网络技术有限公司
计算机网络技术考试试题拓展题
北京质量软件开发设施参考价格
MSN聊天软件开发
小学网络安全自查总结博客
浪潮服务器怎么进管理卡
网络安全心得观后感
软件开发工程师的机会
多ip服务器适合做采集业务吗
网络安全检查意见书 深圳市
app服务器错误503
死亡跑酷的服务器
网络安全基础设施有哪些
arcgis插入数据库
金蝶数据库删除发票记录
金华直销软件开发公司电话
迪拜软件开发靠谱吗
macs数据库总控 密码
网络技术考试都考啥
全县开展网络安全培训会
数据数据库管理人员之间的关系