PostgreSQL绍聚合函数实现中怎么使用的simplehash
发表于:2024-12-12 作者:千家信息网编辑
千家信息网最后更新 2024年12月12日,这篇文章主要讲解了"PostgreSQL绍聚合函数实现中怎么使用的simplehash",文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习"PostgreSQ
千家信息网最后更新 2024年12月12日PostgreSQL绍聚合函数实现中怎么使用的simplehash
这篇文章主要讲解了"PostgreSQL绍聚合函数实现中怎么使用的simplehash",文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习"PostgreSQL绍聚合函数实现中怎么使用的simplehash"吧!
//src/backend/executor/execGrouping.c#define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key) //SH_HASH_KEY --> TupleHashTableHash#define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0 //SH_EQUAL --> TupleHashTableMatch
一、数据结构
TupleHashTable
哈希表定义
typedef struct TupleHashTableData *TupleHashTable;typedef struct TupleHashTableData{ //底层Hash表 tuplehash_hash *hashtab; /* underlying hash table */ //在检索键中的列数 int numCols; /* number of columns in lookup key */ //键列中的属性格式 AttrNumber *keyColIdx; /* attr numbers of key columns */ //数据类型的哈希函数 FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ //数据类型比较器 ExprState *tab_eq_func; /* comparator for table datatype(s) */ //包含数据表的内存上下文 MemoryContext tablecxt; /* memory context containing table */ //函数解析上下文 MemoryContext tempcxt; /* context for function evaluations */ //构造每个哈希条目的实际大小 Size entrysize; /* actual size to make each hash entry */ //依赖数据表条目的slot TupleTableSlot *tableslot; /* slot for referencing table entries */ /* The following fields are set transiently for each table search: */ //下面字段为每一个表检索时临时设置 //当前输入tuple slot TupleTableSlot *inputslot; /* current input tuple's slot */ //输入数据类型的哈希函数 FmgrInfo *in_hash_funcs; /* hash functions for input datatype(s) */ //input vs table的比较器 ExprState *cur_eq_func; /* comparator for input vs. table */ //哈希函数IV uint32 hash_iv; /* hash-function IV */ //表达式上下文 ExprContext *exprcontext; /* expression context */} TupleHashTableData;typedef tuplehash_iterator TupleHashIterator;/* type definitions *///哈希表类型定义typedef struct SH_TYPE //tuplehash_hash{ /* * Size of data / bucket array, 64 bits to handle UINT32_MAX sized hash * tables. Note that the maximum number of elements is lower * (SH_MAX_FILLFACTOR) * 数据/桶数组大小,64 bit用于处理UINT32_MAX哈希表. * 注意元素最大格式小于(SH_MAX_FILLFACTOR) */ uint64 size; /* how many elements have valid contents */ //有多少个元素具有有效内容 uint32 members; /* mask for bucket and size calculations, based on size */ //基于大小,用于计算桶和大小的掩码 uint32 sizemask; /* boundary after which to grow hashtable */ //哈希表增长的阈值 uint32 grow_threshold; /* hash buckets */ //哈希桶 SH_ELEMENT_TYPE *data; /* memory context to use for allocations */ //用于分配的内存上下文 MemoryContext ctx; /* user defined data, useful for callbacks */ //用户自定义的数据,通常用于回调函数 void *private_data;} SH_TYPE;//实际是tuplehash_hash
TupleHashEntryData
哈希表条目
typedef struct TupleHashEntryData *TupleHashEntry;typedef struct TupleHashTableData *TupleHashTable;typedef struct TupleHashEntryData{ //该组第一个元组的拷贝 MinimalTuple firstTuple; /* copy of first tuple in this group */ //用户数据 void *additional; /* user data */ //状态(见SH_STATUS) uint32 status; /* hash status */ //哈希值(已缓存) uint32 hash; /* hash value (cached) */} TupleHashEntryData;typedef enum SH_STATUS{ SH_STATUS_EMPTY = 0x00, SH_STATUS_IN_USE = 0x01} SH_STATUS;
MinimalTuple
最小化的元组定义
/* * MinimalTuple is an alternative representation that is used for transient * tuples inside the executor, in places where transaction status information * is not required, the tuple rowtype is known, and shaving off a few bytes * is worthwhile because we need to store many tuples. The representation * is chosen so that tuple access routines can work with either full or * minimal tuples via a HeapTupleData pointer structure. The access routines * see no difference, except that they must not access the transaction status * or t_ctid fields because those aren't there. * * For the most part, MinimalTuples should be accessed via TupleTableSlot * routines. These routines will prevent access to the "system columns" * and thereby prevent accidental use of the nonexistent fields. * * MinimalTupleData contains a length word, some padding, and fields matching * HeapTupleHeaderData beginning with t_infomask2. The padding is chosen so * that offsetof(t_infomask2) is the same modulo MAXIMUM_ALIGNOF in both * structs. This makes data alignment rules equivalent in both cases. * * When a minimal tuple is accessed via a HeapTupleData pointer, t_data is * set to point MINIMAL_TUPLE_OFFSET bytes before the actual start of the * minimal tuple --- that is, where a full tuple matching the minimal tuple's * data would start. This trick is what makes the structs seem equivalent. * * Note that t_hoff is computed the same as in a full tuple, hence it includes * the MINIMAL_TUPLE_OFFSET distance. t_len does not include that, however. * * MINIMAL_TUPLE_DATA_OFFSET is the offset to the first useful (non-pad) data * other than the length word. tuplesort.c and tuplestore.c use this to avoid * writing the padding to disk. */#define MINIMAL_TUPLE_OFFSET \ ((offsetof(HeapTupleHeaderData, t_infomask2) - sizeof(uint32)) / MAXIMUM_ALIGNOF * MAXIMUM_ALIGNOF)#define MINIMAL_TUPLE_PADDING \ ((offsetof(HeapTupleHeaderData, t_infomask2) - sizeof(uint32)) % MAXIMUM_ALIGNOF)#define MINIMAL_TUPLE_DATA_OFFSET \ offsetof(MinimalTupleData, t_infomask2)struct MinimalTupleData{ uint32 t_len; /* actual length of minimal tuple */ char mt_padding[MINIMAL_TUPLE_PADDING]; /* Fields below here must match HeapTupleHeaderData! */ uint16 t_infomask2; /* number of attributes + various flags */ uint16 t_infomask; /* various flag bits, see below */ uint8 t_hoff; /* sizeof header incl. bitmap, padding */ /* ^ - 23 bytes - ^ */ bits8 t_bits[FLEXIBLE_ARRAY_MEMBER]; /* bitmap of NULLs */ /* MORE DATA FOLLOWS AT END OF STRUCT */};/* typedef appears in htup.h */#define SizeofMinimalTupleHeader offsetof(MinimalTupleData, t_bits)typedef struct MinimalTupleData MinimalTupleData;typedef MinimalTupleData *MinimalTuple;
二、源码解读
TupleHashTableHash
TupleHashTableHash用于计算tuple的哈希值(分组列值)
/* * Compute the hash value for a tuple * 计算tuple的哈希值 * * The passed-in key is a pointer to TupleHashEntryData. In an actual hash * table entry, the firstTuple field points to a tuple (in MinimalTuple * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a * NULL firstTuple field --- that cues us to look at the inputslot instead. * This convention avoids the need to materialize virtual input tuples unless * they actually need to get copied into the table. * 传入的key是指向TupleHashEntryData结构体的指针. * 在实际的哈希表条目中,firstTuple字段指向一个元组(以MinimalTuple格式保存). * LookupTupleHashEntry会使用NULL firstTuple字段设置一个虚拟的TupleHashEntryData. * --- 这可以提示我们转而查看inputslot. * 这个转换避免了物化虚拟输入元组,除非它们需要实际拷贝到数据表中. * * Also, the caller must select an appropriate memory context for running * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) * 同时,调用者必须选择合适的内存上下文用于运行哈希函数. * (dynahash.c不会改变CurrentMemoryContext) */static uint32TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple){ //Tuple 哈希表 TupleHashTable hashtable = (TupleHashTable) tb->private_data; //列数 int numCols = hashtable->numCols; //属性编号 AttrNumber *keyColIdx = hashtable->keyColIdx; //哈希key uint32 hashkey = hashtable->hash_iv; //元组slot TupleTableSlot *slot; //哈希函数指针 FmgrInfo *hashfunctions; int i; if (tuple == NULL)//元组为NULL { /* Process the current input tuple for the table */ //处理当前输入元组 slot = hashtable->inputslot; hashfunctions = hashtable->in_hash_funcs; } else { /* * Process a tuple already stored in the table. * 处理已存储在数据表中的元组. * * (this case never actually occurs due to the way simplehash.h is * used, as the hash-value is stored in the entries) * (这种情况因为simplehash.h的使用,实际上不会发生,因为哈希值存储在条目中) */ slot = hashtable->tableslot; //存储MinimalTuple ExecStoreMinimalTuple(tuple, slot, false); hashfunctions = hashtable->tab_hash_funcs; } for (i = 0; i < numCols; i++) { //------- 循环遍历列数 //获取属性编号 AttrNumber att = keyColIdx[i]; Datum attr;//属性 bool isNull;//是否为NULL? /* rotate hashkey left 1 bit at each step */ //每一步向左移动一位 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); //获取属性值 attr = slot_getattr(slot, att, &isNull); //如为null,则哈希key设置为0 if (!isNull) /* treat nulls as having hash key 0 */ { //不为NULL uint32 hkey; //调用哈希函数 hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i], attr)); hashkey ^= hkey; } } /* * The way hashes are combined above, among each other and with the IV, * doesn't lead to good bit perturbation. As the IV's goal is to lead to * achieve that, perform a round of hashing of the combined hash - * resulting in near perfect perturbation. * 上面哈希的的组合方式,彼此之间以及与IV的组合方式,都不会导致位扰动. * 因为IV存在的目的是实现该目标,执行组合哈希的hashing取整 -- 结果是完美的扰动. */ return murmurhash42(hashkey);}
TupleHashTableMatch
TupleHashTableMatch用于判断两个tuples是否匹配(有相同的hash值)
/* * See whether two tuples (presumably of the same hash value) match * 检查两个tuples是否匹配(有相同的hash值) * * As above, the passed pointers are pointers to TupleHashEntryData. * 如上所述,传入的指针指向TupleHashEntryData */static intTupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2){ TupleTableSlot *slot1; TupleTableSlot *slot2; TupleHashTable hashtable = (TupleHashTable) tb->private_data; ExprContext *econtext = hashtable->exprcontext; /* * We assume that simplehash.h will only ever call us with the first * argument being an actual table entry, and the second argument being * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction * could be supported too, but is not currently required. */ Assert(tuple1 != NULL); slot1 = hashtable->tableslot; ExecStoreMinimalTuple(tuple1, slot1, false); Assert(tuple2 == NULL); slot2 = hashtable->inputslot; /* For crosstype comparisons, the inputslot must be first */ econtext->ecxt_innertuple = slot2; econtext->ecxt_outertuple = slot1; return !ExecQualAndReset(hashtable->cur_eq_func, econtext);}
三、跟踪分析
测试脚本
-- 禁用并行set max_parallel_workers_per_gather=0;select bh,avg(c1),min(c1),max(c2) from t_agg_simple group by bh;
跟踪分析
(gdb) b TupleHashTableHashBreakpoint 1 at 0x6d3b2b: file execGrouping.c, line 379.(gdb) b TupleHashTableMatchBreakpoint 2 at 0x6d3c79: file execGrouping.c, line 446.(gdb) (gdb) cContinuing.Breakpoint 1, TupleHashTableHash (tb=0x2dd2720, tuple=0x0) at execGrouping.c:379379 TupleHashTable hashtable = (TupleHashTable) tb->private_data;(gdb)
输入参数
(gdb) p *tb$1 = {size = 256, members = 0, sizemask = 255, grow_threshold = 230, data = 0x2ddca00, ctx = 0x2db5310, private_data = 0x2dd2890}(gdb) p *tb->data$2 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0}
获取分组列数
(gdb) n380 int numCols = hashtable->numCols;(gdb) p *hashtable$3 = {hashtab = 0x2dd2720, numCols = 1, keyColIdx = 0x2dd2680, tab_hash_funcs = 0x2db72d0, tab_eq_func = 0x2ddea18, tablecxt = 0x2dcc370, tempcxt = 0x2db7320, entrysize = 24, tableslot = 0x2dd2928, inputslot = 0x2db7238, in_hash_funcs = 0x2db72d0, cur_eq_func = 0x2ddea18, hash_iv = 0, exprcontext = 0x2ddf338}(gdb) p tb->private_data$4 = (void *) 0x2dd2890
获取分组列属性编号
(gdb) n381 AttrNumber *keyColIdx = hashtable->keyColIdx;(gdb) 382 uint32 hashkey = hashtable->hash_iv;(gdb) p *keyColIdx$5 = 1
如输入tuple为NULL,设置slot和哈希函数
(gdb) n387 if (tuple == NULL)(gdb) p hashkey$6 = 0(gdb) n390 slot = hashtable->inputslot;(gdb) 391 hashfunctions = hashtable->in_hash_funcs;
开始遍历分组列
获取hashkey
(gdb) n406 for (i = 0; i < numCols; i++)(gdb) p numCols$8 = 1(gdb) n408 AttrNumber att = keyColIdx[i];(gdb) 413 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);(gdb) p att$9 = 1(gdb) p hashkey$10 = 0
获取属性值
(gdb) n415 attr = slot_getattr(slot, att, &isNull);(gdb) p hashkey$11 = 0(gdb) n417 if (!isNull) /* treat nulls as having hash key 0 */(gdb) p attr$12 = 140535426168416(gdb) x\16c attrInvalid character '\' in expression.(gdb) x/16c attr0x7fd0f427b660: 11 '\v' 71 'G' 90 'Z' 48 '0' 49 '1' 0 '\000' 0 '\000' 0 '\000'0x7fd0f427b668: 1 '\001' 0 '\000' 0 '\000' 0 '\000' 1 '\001' 0 '\000' 0 '\000' 0 '\000'
计算哈希值
(gdb) n421 hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],(gdb) p hashfunctions[i]$13 = {fn_addr = 0x4c8a31, fn_oid = 400, fn_nargs = 1, fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2db5310, fn_expr = 0x0}(gdb) p i$14 = 0(gdb) n423 hashkey ^= hkey;(gdb) p hkey$15 = 3431319292(gdb) n406 for (i = 0; i < numCols; i++)(gdb) p hashkey$16 = 3431319292
返回结果
(gdb) n433 return murmurhash42(hashkey);(gdb) p murmurhash42(hashkey)$17 = 443809650(gdb) n434 }(gdb) tuplehash_insert (tb=0x2dd2720, key=0x0, found=0x7fff585be487) at ../../../src/include/lib/simplehash.h:497497 insertdist = 0;(gdb)
TupleHashTableMatch
进入TupleHashTableMatch
(gdb) cContinuing.Breakpoint 2, TupleHashTableMatch (tb=0x2dd2720, tuple1=0x2dcc488, tuple2=0x0) at execGrouping.c:446446 TupleHashTable hashtable = (TupleHashTable) tb->private_data;(gdb)
输入参数
(gdb) p *tb$18 = {size = 256, members = 1, sizemask = 255, grow_threshold = 230, data = 0x2ddca00, ctx = 0x2db5310, private_data = 0x2dd2890}(gdb) p *tuple1$19 = {t_len = 21, mt_padding = "\000\000\000\000\000", t_infomask2 = 1, t_infomask = 2, t_hoff = 24 '\030', t_bits = 0x2dcc497 ""}
对比是否匹配
(gdb) n447 ExprContext *econtext = hashtable->exprcontext;(gdb) 455 Assert(tuple1 != NULL);(gdb) 456 slot1 = hashtable->tableslot;(gdb) 457 ExecStoreMinimalTuple(tuple1, slot1, false);(gdb) 458 Assert(tuple2 == NULL);(gdb) 459 slot2 = hashtable->inputslot;(gdb) 462 econtext->ecxt_innertuple = slot2;(gdb) 463 econtext->ecxt_outertuple = slot1;(gdb) 464 return !ExecQualAndReset(hashtable->cur_eq_func, econtext);(gdb) p hashtable->cur_eq_func$20 = (ExprState *) 0x2ddea18(gdb) p *hashtable->cur_eq_func$21 = {tag = {type = T_ExprState}, flags = 7 '\a', resnull = false, resvalue = 0, resultslot = 0x0, steps = 0x2ddeab0, evalfunc = 0x6cd882, expr = 0x0, evalfunc_private = 0x6cb43e , steps_len = 7, steps_alloc = 16, parent = 0x0, ext_params = 0x0, innermost_caseval = 0x0, innermost_casenull = 0x0, innermost_domainval = 0x0, innermost_domainnull = 0x0}
返回值
$22 = true(gdb) n465 }(gdb) tuplehash_insert (tb=0x2dd2720, key=0x0, found=0x7fff585be487) at ../../../src/include/lib/simplehash.h:556556 Assert(entry->status == SH_STATUS_IN_USE);(gdb)
感谢各位的阅读,以上就是"PostgreSQL绍聚合函数实现中怎么使用的simplehash"的内容了,经过本文的学习后,相信大家对PostgreSQL绍聚合函数实现中怎么使用的simplehash这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是,小编将为大家推送更多相关知识点的文章,欢迎关注!
哈希
函数
数据
属性
输入
上下
上下文
实际
条目
大小
数据表
类型
分组
内存
内容
字段
指向
指针
格式
处理
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
体验服哪个服务器最好
旅馆业网络安全内部管理制度
操作数据库数据积压解决办法
软件开发五大核心原则
网络安全方面好就业嘛
服务器多线程安全
极光蓝网络技术有限公司招聘
税控服务器管理系统登录口令
盛世芳华怎么选择服务器
湖北pdu服务器电源价格表
支付宝北京网络技术有限公司
中学网络安全宣传栏图片
数据库原理与应用案例题
长春龙旭网络技术
网络安全运营架构
强国网络技术有限公司
网络安全教育主题班会总结稿子
linux图片服务器搭建
网络安全时事
如何建立医院应急专家数据库
qt连接mysql数据库有驱动
本机无法链接服务器数据库
十大常见网络安全问题
保密与网络安全综合大检查
软件开发等级t6
文件服务器拷贝慢
50人同时用的服务器硬件
软件开发需求文案
navicat和数据库编程
福建智远时空网络技术