PostgreSQL 源码解读(194)- 查询#110(排序#3 - 实现)
发表于:2024-10-30 作者:千家信息网编辑
千家信息网最后更新 2024年10月30日,本节继续介绍排序的实现,上一节介绍了ExecSort,本节介绍该函数中调用的排序的具体实现函数,本节是第一部分,包括tuplesort_begin_heap/tuplesort_puttupleslo
千家信息网最后更新 2024年10月30日PostgreSQL 源码解读(194)- 查询#110(排序#3 - 实现)
本节继续介绍排序的实现,上一节介绍了ExecSort,本节介绍该函数中调用的排序的具体实现函数,本节是第一部分,包括tuplesort_begin_heap/tuplesort_puttupleslot.
一、数据结构
SortState
排序运行期状态信息
/* ---------------- * SortState information * 排序运行期状态信息 * ---------------- */typedef struct SortState{ //基类 ScanState ss; /* its first field is NodeTag */ //是否需要随机访问排序输出? bool randomAccess; /* need random access to sort output? */ //结果集是否存在边界? bool bounded; /* is the result set bounded? */ //如存在边界,需要多少个元组? int64 bound; /* if bounded, how many tuples are needed */ //是否已完成排序? bool sort_Done; /* sort completed yet? */ //是否使用有界值? bool bounded_Done; /* value of bounded we did the sort with */ //使用的有界值? int64 bound_Done; /* value of bound we did the sort with */ //tuplesort.c的私有状态 void *tuplesortstate; /* private state of tuplesort.c */ //是否worker? bool am_worker; /* are we a worker? */ //每个worker对应一个条目 SharedSortInfo *shared_info; /* one entry per worker */} SortState;/* ---------------- * Shared memory container for per-worker sort information * per-worker排序信息的共享内存容器 * ---------------- */typedef struct SharedSortInfo{ //worker个数? int num_workers; //排序机制 TuplesortInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];} SharedSortInfo;
TuplesortInstrumentation
报告排序统计的数据结构.
/* * Data structures for reporting sort statistics. Note that * TuplesortInstrumentation can't contain any pointers because we * sometimes put it in shared memory. * 报告排序统计的数据结构. * 注意TuplesortInstrumentation不能包含指针因为有时候会把该结构体放在共享内存中. */typedef enum{ SORT_TYPE_STILL_IN_PROGRESS = 0,//仍然在排序中 SORT_TYPE_TOP_N_HEAPSORT,//TOP N 堆排序 SORT_TYPE_QUICKSORT,//快速排序 SORT_TYPE_EXTERNAL_SORT,//外部排序 SORT_TYPE_EXTERNAL_MERGE//外部排序后的合并} TuplesortMethod;//排序方法typedef enum{ SORT_SPACE_TYPE_DISK,//需要用上磁盘 SORT_SPACE_TYPE_MEMORY//使用内存} TuplesortSpaceType;typedef struct TuplesortInstrumentation{ //使用的排序算法 TuplesortMethod sortMethod; /* sort algorithm used */ //排序使用空间类型 TuplesortSpaceType spaceType; /* type of space spaceUsed represents */ //空间消耗(以K为单位) long spaceUsed; /* space consumption, in kB */} TuplesortInstrumentation;
二、源码解读
tuplesort_begin_heap和tuplesort_puttupleslot都是准备工作,把tuple放到数组(堆)中,为后续的实际排序实现作准备.
tuplesort_begin_heap
Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,//元组描述符 int nkeys, //排序键个数 AttrNumber *attNums,//属性编号 Oid *sortOperators, //排序操作符 Oid *sortCollations,//排序规则 bool *nullsFirstFlags,//标记 int workMem, //内存大小 SortCoordinate coordinate,//协调器 bool randomAccess)//是否随机访问{ //获取排序状态 Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, randomAccess); MemoryContext oldcontext; int i; oldcontext = MemoryContextSwitchTo(state->sortcontext); AssertArg(nkeys > 0);#ifdef TRACE_SORT if (trace_sort) elog(LOG, "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", nkeys, workMem, randomAccess ? 't' : 'f');#endif state->nKeys = nkeys; TRACE_POSTGRESQL_SORT_START(HEAP_SORT, false, /* no unique check */ nkeys, workMem, randomAccess, PARALLEL_SORT(state)); //设置运行状态 state->comparetup = comparetup_heap; state->copytup = copytup_heap; state->writetup = writetup_heap; state->readtup = readtup_heap; //假定不需要拷贝元组描述符 state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ state->abbrevNext = 10; /* Prepare SortSupport data for each column */ //为每一列准备SortSupport数据(分配内存空间) state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); for (i = 0; i < nkeys; i++) { //------- 遍历排序键 //排序键 SortSupport sortKey = state->sortKeys + i; AssertArg(attNums[i] != 0); AssertArg(sortOperators[i] != 0); //设置SortSupport sortKey->ssup_cxt = CurrentMemoryContext; sortKey->ssup_collation = sortCollations[i]; sortKey->ssup_nulls_first = nullsFirstFlags[i]; sortKey->ssup_attno = attNums[i]; /* Convey if abbreviation optimization is applicable in principle */ //缩写优化是否原则上可用? sortKey->abbreviate = (i == 0); //设置 PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); } /* * The "onlyKey" optimization cannot be used with abbreviated keys, since * tie-breaker comparisons may be required. Typically, the optimization * is only of value to pass-by-value types anyway, whereas abbreviated * keys are typically only of value to pass-by-reference types. * 对于缩写优化"onlyKey"优化不能使用,因为可能需要tie-breaker比较. * 典型的,优化器只对按值传递类型有价值,而缩写建通常只对引用传递类型有价值. */ if (nkeys == 1 && !state->sortKeys->abbrev_converter) state->onlyKey = state->sortKeys; MemoryContextSwitchTo(oldcontext); return state;}
tuplesort_puttupleslot
接收一个元组(一行)
/* * Accept one tuple while collecting input data for sort. * 接收一个元组(一行) * * Note that the input data is always copied; the caller need not save it. * 注意输入数据通常会被拷贝,调用者不需要保存这些数据. */voidtuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot){ MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; /* * Copy the given tuple into memory we control, and decrease availMem. * Then call the common code. * 拷贝给定的元组在我们控制的内存中,同时减少可用内存. * 然后调用puttuple_common函数. */ //#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup)) COPYTUP(state, &stup, (void *) slot); puttuple_common(state, &stup); MemoryContextSwitchTo(oldcontext);}/* * Shared code for tuple and datum cases. * 元组和datum共享的代码. */static voidputtuple_common(Tuplesortstate *state, SortTuple *tuple){ Assert(!LEADER(state)); switch (state->status) { case TSS_INITIAL://初始化 /* * Save the tuple into the unsorted array. First, grow the array * as needed. Note that we try to grow the array when there is * still one free slot remaining --- if we fail, there'll still be * room to store the incoming tuple, and then we'll switch to * tape-based operation. * 在未排序的数组中存储元组. * 首先,如需要则扩展数组大小.注意在只有1个slot剩下来的时候才尝试扩展数组. * 假如扩展失败,仍有空间可用于存储输入的元组,然后将切换至tape-based(基于磁带)操作. */ if (state->memtupcount >= state->memtupsize - 1) { (void) grow_memtuples(state); Assert(state->memtupcount < state->memtupsize); } state->memtuples[state->memtupcount++] = *tuple;//在数组中保存元组 /* * Check if it's time to switch over to a bounded heapsort. We do * so if the input tuple count exceeds twice the desired tuple * count (this is a heuristic for where heapsort becomes cheaper * than a quicksort), or if we've just filled workMem and have * enough tuples to meet the bound. * 检查是否切换至有界heapsort. * 在输入元组个数超出期望元组个数两倍的情况下执行该检查 * (在堆排序比快速排序成本更低时所获得的洞见), * 或者如果我们已经填充了workMem并且有足够的元组满足bound时也执行该检查. * * Note that once we enter TSS_BOUNDED state we will always try to * complete the sort that way. In the worst case, if later input * tuples are larger than earlier ones, this might cause us to * exceed workMem significantly. * 注意我们一旦进入TSS_BOUNDED状态,将尝试按指定的方式完成排序. * 在最坏的情况下,如果后续的输入元组比早前的要大,这可能会导致内存大小会超出workMem. */ if (state->bounded && (state->memtupcount > state->bound * 2 || (state->memtupcount > state->bound && LACKMEM(state)))) {#ifdef TRACE_SORT if (trace_sort) elog(LOG, "switching to bounded heapsort at %d tuples: %s", state->memtupcount, pg_rusage_show(&state->ru_start));#endif //切换至堆排序 make_bounded_heap(state); return; } /* * Done if we still fit in available memory and have array slots. * 如果内存空间可用并且数组有位置存储,则已完成任务! */ if (state->memtupcount < state->memtupsize && !LACKMEM(state)) return; /* * Nope; time to switch to tape-based operation. * 切换至tape-base操作 */ inittapes(state, true); /* * Dump all tuples. * dump所有元组 */ dumptuples(state, false); break; case TSS_BOUNDED://有界堆排序 /* * We don't want to grow the array here, so check whether the new * tuple can be discarded before putting it in. This should be a * good speed optimization, too, since when there are many more * input tuples than the bound, most input tuples can be discarded * with just this one comparison. Note that because we currently * have the sort direction reversed, we must check for <= not >=. * 不希望在这里扩展数组,因此检查在放到数组前检查是否可用废弃某些新元组. * 这将是一个很好的性能优化,因为存在比边界要大得多的输入元组, * 大多数输入元组会通过对比被废弃. */ if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0) { /* new tuple <= top of the heap, so we can discard it */ // 新元组 <= 堆顶,可以废弃 free_sort_tuple(state, tuple); CHECK_FOR_INTERRUPTS(); } else { /* discard top of heap, replacing it with the new tuple */ //废弃堆顶,使用新元组替换 free_sort_tuple(state, &state->memtuples[0]); tuplesort_heap_replace_top(state, tuple); } break; case TSS_BUILDRUNS://构建运行期信息 /* * Save the tuple into the unsorted array (there must be space) * 保存元组到未排序数组中(存在空闲空间) */ state->memtuples[state->memtupcount++] = *tuple; /* * If we are over the memory limit, dump all tuples. * 如果已超出内存限制,则dump所有元组 */ dumptuples(state, false); break; default: elog(ERROR, "invalid tuplesort state"); break; }}
三、跟踪分析
N/A
四、参考资料
N/A
排序
内存
数组
数据
状态
输入
空间
检查
个数
信息
结构
切换
函数
大小
拷贝
数据结构
类型
缩写
行期
边界
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
淄博诚信积分管理软件开发系统
密码编程学和网络安全第六章
国家电网网络安全宣传报道
佟年世界网络安全大赛排行
海鹚网络技术有限公司
高速公路服务器行程码检查
戴尔r710服务器远程管理
国家电网网络技术研究院
南昌县软件开发培训机构
网络安全公益广告动漫
intel 服务器内存
lol第二局服务器无法连接
元气骑士怎么进入不同服务器
查看苹果id服务器连接失败
操作系统里有哪些是数据库
机械软件开发调试
rj45服务器接口
绍兴专业从事财务管理软件开发
寻甸综合软件开发
团购bvd数据库
天津现代化软件开发模型
博罗民宿软件开发
数据库 复制表 一列数据
如何提升网络安全意识
敏捷软件开发时主要的因素
网络安全应急技术支撑队伍
qt开发的数据库管理系统
华为网络技术交换机配置
sql2008数据库
vs自带数据库如何操作