PostgreSQL绍聚合函数实现中怎么使用的simplehash
发表于:2024-12-12 作者:千家信息网编辑
千家信息网最后更新 2024年12月12日,这篇文章主要讲解了"PostgreSQL绍聚合函数实现中怎么使用的simplehash",文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习"PostgreSQ
千家信息网最后更新 2024年12月12日PostgreSQL绍聚合函数实现中怎么使用的simplehash
这篇文章主要讲解了"PostgreSQL绍聚合函数实现中怎么使用的simplehash",文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习"PostgreSQL绍聚合函数实现中怎么使用的simplehash"吧!
//src/backend/executor/execGrouping.c#define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key) //SH_HASH_KEY --> TupleHashTableHash#define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0 //SH_EQUAL --> TupleHashTableMatch
一、数据结构
TupleHashTable
哈希表定义
typedef struct TupleHashTableData *TupleHashTable;typedef struct TupleHashTableData{ //底层Hash表 tuplehash_hash *hashtab; /* underlying hash table */ //在检索键中的列数 int numCols; /* number of columns in lookup key */ //键列中的属性格式 AttrNumber *keyColIdx; /* attr numbers of key columns */ //数据类型的哈希函数 FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ //数据类型比较器 ExprState *tab_eq_func; /* comparator for table datatype(s) */ //包含数据表的内存上下文 MemoryContext tablecxt; /* memory context containing table */ //函数解析上下文 MemoryContext tempcxt; /* context for function evaluations */ //构造每个哈希条目的实际大小 Size entrysize; /* actual size to make each hash entry */ //依赖数据表条目的slot TupleTableSlot *tableslot; /* slot for referencing table entries */ /* The following fields are set transiently for each table search: */ //下面字段为每一个表检索时临时设置 //当前输入tuple slot TupleTableSlot *inputslot; /* current input tuple's slot */ //输入数据类型的哈希函数 FmgrInfo *in_hash_funcs; /* hash functions for input datatype(s) */ //input vs table的比较器 ExprState *cur_eq_func; /* comparator for input vs. table */ //哈希函数IV uint32 hash_iv; /* hash-function IV */ //表达式上下文 ExprContext *exprcontext; /* expression context */} TupleHashTableData;typedef tuplehash_iterator TupleHashIterator;/* type definitions *///哈希表类型定义typedef struct SH_TYPE //tuplehash_hash{ /* * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash * tables. Note that the maximum number of elements is lower * (SH_MAX_FILLFACTOR) * 数据/桶数组大小,64 bit用于处理UINT32_MAX哈希表. * 注意元素最大格式小于(SH_MAX_FILLFACTOR) */ uint64 size; /* how many elements have valid contents */ //有多少个元素具有有效内容 uint32 members; /* mask for bucket and size calculations, based on size */ //基于大小,用于计算桶和大小的掩码 uint32 sizemask; /* boundary after which to grow hashtable */ //哈希表增长的阈值 uint32 grow_threshold; /* hash buckets */ //哈希桶 SH_ELEMENT_TYPE *data; /* memory context to use for allocations */ //用于分配的内存上下文 MemoryContext ctx; /* user defined data, useful for callbacks */ //用户自定义的数据,通常用于回调函数 void *private_data;} SH_TYPE;//实际是tuplehash_hash
TupleHashEntryData
哈希表条目
typedef struct TupleHashEntryData *TupleHashEntry;typedef struct TupleHashTableData *TupleHashTable;typedef struct TupleHashEntryData{ //该组第一个元组的拷贝 MinimalTuple firstTuple; /* copy of first tuple in this group */ //用户数据 void *additional; /* user data */ //状态(见SH_STATUS) uint32 status; /* hash status */ //哈希值(已缓存) uint32 hash; /* hash value (cached) */} TupleHashEntryData;typedef enum SH_STATUS{ SH_STATUS_EMPTY = 0x00, SH_STATUS_IN_USE = 0x01} SH_STATUS;
MinimalTuple
最小化的元组定义
/* * MinimalTuple is an alternative representation that is used for transient * tuples inside the executor, in places where transaction status information * is not required, the tuple rowtype is known, and shaving off a few bytes * is worthwhile because we need to store many tuples. The representation * is chosen so that tuple access routines can work with either full or * minimal tuples via a HeapTupleData pointer structure. The access routines * see no difference, except that they must not access the transaction status * or t_ctid fields because those aren't there. * * For the most part, MinimalTuples should be accessed via TupleTableSlot * routines. These routines will prevent access to the "system columns" * and thereby prevent accidental use of the nonexistent fields. * * MinimalTupleData contains a length word, some padding, and fields matching * HeapTupleHeaderData beginning with t_infomask2. The padding is chosen so * that offsetof(t_infomask2) is the same modulo MAXIMUM_ALIGNOF in both * structs. This makes data alignment rules equivalent in both cases. * * When a minimal tuple is accessed via a HeapTupleData pointer, t_data is * set to point MINIMAL_TUPLE_OFFSET bytes before the actual start of the * minimal tuple --- that is, where a full tuple matching the minimal tuple's * data would start. This trick is what makes the structs seem equivalent. * * Note that t_hoff is computed the same as in a full tuple, hence it includes * the MINIMAL_TUPLE_OFFSET distance. t_len does not include that, however. * * MINIMAL_TUPLE_DATA_OFFSET is the offset to the first useful (non-pad) data * other than the length word. tuplesort.c and tuplestore.c use this to avoid * writing the padding to disk. */#define MINIMAL_TUPLE_OFFSET \ ((offsetof(HeapTupleHeaderData, t_infomask2) - sizeof(uint32)) / MAXIMUM_ALIGNOF * MAXIMUM_ALIGNOF)#define MINIMAL_TUPLE_PADDING \ ((offsetof(HeapTupleHeaderData, t_infomask2) - sizeof(uint32)) % MAXIMUM_ALIGNOF)#define MINIMAL_TUPLE_DATA_OFFSET \ offsetof(MinimalTupleData, t_infomask2)struct MinimalTupleData{ uint32 t_len; /* actual length of minimal tuple */ char mt_padding[MINIMAL_TUPLE_PADDING]; /* Fields below here must match HeapTupleHeaderData! */ uint16 t_infomask2; /* number of attributes + various flags */ uint16 t_infomask; /* various flag bits, see below */ uint8 t_hoff; /* sizeof header incl. bitmap, padding */ /* ^ - 23 bytes - ^ */ bits8 t_bits[FLEXIBLE_ARRAY_MEMBER]; /* bitmap of NULLs */ /* MORE DATA FOLLOWS AT END OF STRUCT */};/* typedef appears in htup.h */#define SizeofMinimalTupleHeader offsetof(MinimalTupleData, t_bits)typedef struct MinimalTupleData MinimalTupleData;typedef MinimalTupleData *MinimalTuple;
二、源码解读
TupleHashTableHash
TupleHashTableHash用于计算tuple的哈希值(分组列值)
/* * Compute the hash value for a tuple * 计算tuple的哈希值 * * The passed-in key is a pointer to TupleHashEntryData. In an actual hash * table entry, the firstTuple field points to a tuple (in MinimalTuple * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a * NULL firstTuple field --- that cues us to look at the inputslot instead. * This convention avoids the need to materialize virtual input tuples unless * they actually need to get copied into the table. * 传入的key是指向TupleHashEntryData结构体的指针. * 在实际的哈希表条目中,firstTuple字段指向一个元组(以MinimalTuple格式保存). * LookupTupleHashEntry会使用NULL firstTuple字段设置一个虚拟的TupleHashEntryData. * --- 这可以提示我们转而查看inputslot. * 这个转换避免了物化虚拟输入元组,除非它们需要实际拷贝到数据表中. * * Also, the caller must select an appropriate memory context for running * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) * 同时,调用者必须选择合适的内存上下文用于运行哈希函数. * (dynahash.c不会改变CurrentMemoryContext) */static uint32TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple){ //Tuple 哈希表 TupleHashTable hashtable = (TupleHashTable) tb->private_data; //列数 int numCols = hashtable->numCols; //属性编号 AttrNumber *keyColIdx = hashtable->keyColIdx; //哈希key uint32 hashkey = hashtable->hash_iv; //元组slot TupleTableSlot *slot; //哈希函数指针 FmgrInfo *hashfunctions; int i; if (tuple == NULL)//元组为NULL { /* Process the current input tuple for the table */ //处理当前输入元组 slot = hashtable->inputslot; hashfunctions = hashtable->in_hash_funcs; } else { /* * Process a tuple already stored in the table. * 处理已存储在数据表中的元组. * * (this case never actually occurs due to the way simplehash.h is * used, as the hash-value is stored in the entries) * (这种情况因为simplehash.h的使用,实际上不会发生,因为哈希值存储在条目中) */ slot = hashtable->tableslot; //存储MinimalTuple ExecStoreMinimalTuple(tuple, slot, false); hashfunctions = hashtable->tab_hash_funcs; } for (i = 0; i < numCols; i++) { //------- 循环遍历列数 //获取属性编号 AttrNumber att = keyColIdx[i]; Datum attr;//属性 bool isNull;//是否为NULL? /* rotate hashkey left 1 bit at each step */ //每一步向左移动一位 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); //获取属性值 attr = slot_getattr(slot, att, &isNull); //如为null,则哈希key设置为0 if (!isNull) /* treat nulls as having hash key 0 */ { //不为NULL uint32 hkey; //调用哈希函数 hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], attr)); hashkey ^= hkey; } } /* * The way hashes are combined above, among each other and with the IV, * doesn't lead to good bit perturbation. As the IV's goal is to lead to * achieve that, perform a round of hashing of the combined hash - * resulting in near perfect perturbation. * 上面哈希的的组合方式,彼此之间以及与IV的组合方式,都不会导致位扰动. * 因为IV存在的目的是实现该目标,执行组合哈希的hashing取整 -- 结果是完美的扰动. */ return murmurhash42(hashkey);}
TupleHashTableMatch
TupleHashTableMatch用于判断两个tuples是否匹配(有相同的hash值)
/* * See whether two tuples (presumably of the same hash value) match * 检查两个tuples是否匹配(有相同的hash值) * * As above, the passed pointers are pointers to TupleHashEntryData. * 如上所述,传入的指针指向TupleHashEntryData */static intTupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2){ TupleTableSlot *slot1; TupleTableSlot *slot2; TupleHashTable hashtable = (TupleHashTable) tb->private_data; ExprContext *econtext = hashtable->exprcontext; /* * We assume that simplehash.h will only ever call us with the first * argument being an actual table entry, and the second argument being * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction * could be supported too, but is not currently required. */ Assert(tuple1 != NULL); slot1 = hashtable->tableslot; ExecStoreMinimalTuple(tuple1, slot1, false); Assert(tuple2 == NULL); slot2 = hashtable->inputslot; /* For crosstype comparisons, the inputslot must be first */ econtext->ecxt_innertuple = slot2; econtext->ecxt_outertuple = slot1; return !ExecQualAndReset(hashtable->cur_eq_func, econtext);}
三、跟踪分析
测试脚本
-- 禁用并行set max_parallel_workers_per_gather=0;select bh,avg(c1),min(c1),max(c2) from t_agg_simple group by bh;
跟踪分析
(gdb) b TupleHashTableHashBreakpoint 1 at 0x6d3b2b: file execGrouping.c, line 379.(gdb) b TupleHashTableMatchBreakpoint 2 at 0x6d3c79: file execGrouping.c, line 446.(gdb) (gdb) cContinuing.Breakpoint 1, TupleHashTableHash (tb=0x2dd2720, tuple=0x0) at execGrouping.c:379379 TupleHashTable hashtable = (TupleHashTable) tb->private_data;(gdb)
输入参数
(gdb) p *tb$1 = {size = 256, members = 0, sizemask = 255, grow_threshold = 230, data = 0x2ddca00, ctx = 0x2db5310, private_data = 0x2dd2890}(gdb) p *tb->data$2 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0}
获取分组列数
(gdb) n380 int numCols = hashtable->numCols;(gdb) p *hashtable$3 = {hashtab = 0x2dd2720, numCols = 1, keyColIdx = 0x2dd2680, tab_hash_funcs = 0x2db72d0, tab_eq_func = 0x2ddea18, tablecxt = 0x2dcc370, tempcxt = 0x2db7320, entrysize = 24, tableslot = 0x2dd2928, inputslot = 0x2db7238, in_hash_funcs = 0x2db72d0, cur_eq_func = 0x2ddea18, hash_iv = 0, exprcontext = 0x2ddf338}(gdb) p tb->private_data$4 = (void *) 0x2dd2890
获取分组列属性编号
(gdb) n381 AttrNumber *keyColIdx = hashtable->keyColIdx;(gdb) 382 uint32 hashkey = hashtable->hash_iv;(gdb) p *keyColIdx$5 = 1
如输入tuple为NULL,设置slot和哈希函数
(gdb) n387 if (tuple == NULL)(gdb) p hashkey$6 = 0(gdb) n390 slot = hashtable->inputslot;(gdb) 391 hashfunctions = hashtable->in_hash_funcs;
开始遍历分组列
获取hashkey
(gdb) n406 for (i = 0; i < numCols; i++)(gdb) p numCols$8 = 1(gdb) n408 AttrNumber att = keyColIdx[i];(gdb) 413 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);(gdb) p att$9 = 1(gdb) p hashkey$10 = 0
获取属性值
(gdb) n415 attr = slot_getattr(slot, att, &isNull);(gdb) p hashkey$11 = 0(gdb) n417 if (!isNull) /* treat nulls as having hash key 0 */(gdb) p attr$12 = 140535426168416(gdb) x\16c attrInvalid character '\' in expression.(gdb) x/16c attr0x7fd0f427b660: 11 '\v' 71 'G' 90 'Z' 48 '0' 49 '1' 0 '\000' 0 '\000' 0 '\000'0x7fd0f427b668: 1 '\001' 0 '\000' 0 '\000' 0 '\000' 1 '\001' 0 '\000' 0 '\000' 0 '\000'
计算哈希值
(gdb) n421 hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],(gdb) p hashfunctions[i]$13 = {fn_addr = 0x4c8a31, fn_oid = 400, fn_nargs = 1, fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2db5310, fn_expr = 0x0}(gdb) p i$14 = 0(gdb) n423 hashkey ^= hkey;(gdb) p hkey$15 = 3431319292(gdb) n406 for (i = 0; i < numCols; i++)(gdb) p hashkey$16 = 3431319292
返回结果
(gdb) n433 return murmurhash42(hashkey);(gdb) p murmurhash42(hashkey)$17 = 443809650(gdb) n434 }(gdb) tuplehash_insert (tb=0x2dd2720, key=0x0, found=0x7fff585be487) at ../../../src/include/lib/simplehash.h:497497 insertdist = 0;(gdb)
TupleHashTableMatch
进入TupleHashTableMatch
(gdb) cContinuing.Breakpoint 2, TupleHashTableMatch (tb=0x2dd2720, tuple1=0x2dcc488, tuple2=0x0) at execGrouping.c:446446 TupleHashTable hashtable = (TupleHashTable) tb->private_data;(gdb)
输入参数
(gdb) p *tb$18 = {size = 256, members = 1, sizemask = 255, grow_threshold = 230, data = 0x2ddca00, ctx = 0x2db5310, private_data = 0x2dd2890}(gdb) p *tuple1$19 = {t_len = 21, mt_padding = "\000\000\000\000\000", t_infomask2 = 1, t_infomask = 2, t_hoff = 24 '\030', t_bits = 0x2dcc497 ""}
对比是否匹配
(gdb) n447 ExprContext *econtext = hashtable->exprcontext;(gdb) 455 Assert(tuple1 != NULL);(gdb) 456 slot1 = hashtable->tableslot;(gdb) 457 ExecStoreMinimalTuple(tuple1, slot1, false);(gdb) 458 Assert(tuple2 == NULL);(gdb) 459 slot2 = hashtable->inputslot;(gdb) 462 econtext->ecxt_innertuple = slot2;(gdb) 463 econtext->ecxt_outertuple = slot1;(gdb) 464 return !ExecQualAndReset(hashtable->cur_eq_func, econtext);(gdb) p hashtable->cur_eq_func$20 = (ExprState *) 0x2ddea18(gdb) p *hashtable->cur_eq_func$21 = {tag = {type = T_ExprState}, flags = 7 '\a', resnull = false, resvalue = 0, resultslot = 0x0, steps = 0x2ddeab0, evalfunc = 0x6cd882, expr = 0x0, evalfunc_private = 0x6cb43e , steps_len = 7, steps_alloc = 16, parent = 0x0, ext_params = 0x0, innermost_caseval = 0x0, innermost_casenull = 0x0, innermost_domainval = 0x0, innermost_domainnull = 0x0}
返回值
$22 = true(gdb) n465 }(gdb) tuplehash_insert (tb=0x2dd2720, key=0x0, found=0x7fff585be487) at ../../../src/include/lib/simplehash.h:556556 Assert(entry->status == SH_STATUS_IN_USE);(gdb)
感谢各位的阅读,以上就是"PostgreSQL绍聚合函数实现中怎么使用的simplehash"的内容了,经过本文的学习后,相信大家对PostgreSQL绍聚合函数实现中怎么使用的simplehash这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是,小编将为大家推送更多相关知识点的文章,欢迎关注!
哈希
函数
数据
属性
输入
上下
上下文
实际
条目
大小
数据表
类型
分组
内存
内容
字段
指向
指针
格式
处理
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
林伟网络安全评论
广电网络技术部
wms软件开发公司排名
华为手机俩卡怎么开数据库
三河网络安全宣传周
计算机网络技术类与网络工程专业
安捷伦软件开发
甘肃医院时钟监控网关服务器
医疗管理软件开发 内容要求
dns服务器不可用
珠海嵌入式软件开发流程费用
服务器海底降温全球温度升高
如何通过服务器查信息
smtp服务器地址 qq
明日之后服务器炸了没有补偿
考勤管理系统为何无数据库
中国网络技术与应用就业前景
如何查找服务器删除文件的记录
yesapi数据库设计
在网络安全和认证
王牌战争断开服务器了怎么办
服务器显示图标不正常
大唐仙妖劫九游服务器
网易手机版怎样开服务器
网络安全宣传周2021
常州网络安全公司
织梦清理数据库
软件开发前景好找工作吗
互联网公司发展科技有限公司
科惠网络技术有限公司电话