千家信息网

PostgreSQL 源码解读(229)- Linux Kernel(进程虚拟内存#3)

发表于:2025-02-02 作者:千家信息网编辑
千家信息网最后更新 2025年02月02日,PostgreSQL使用进程架构,每个连接对应一个后台进程,为了更好的理解这种架构,有必要深入理解进程的相关知识.本节主要介绍了Linux下的进程虚拟内存结构,并以使用C语言样例程序进行说明.一、ma
千家信息网最后更新 2025年02月02日PostgreSQL 源码解读(229)- Linux Kernel(进程虚拟内存#3)

PostgreSQL使用进程架构,每个连接对应一个后台进程,为了更好的理解这种架构,有必要深入理解进程的相关知识.本节主要介绍了Linux下的进程虚拟内存结构,并以使用C语言样例程序进行说明.

一、malloc

先前几节的样例代码通过malloc分配内存,进程虚拟内存中存在heap,如果不使用malloc,虚拟内存是否有heap呢?

[root@localhost linux]# cat 0-main.c #include #include /** * main - do nothing * * Return: EXIT_FAILURE if something failed. Otherwise EXIT_SUCCESS */int main(void){    getchar();    return (EXIT_SUCCESS);}

编译并执行,查看进程的maps

[root@localhost ~]# ps -ef|grep \ \./0root     21802 18855  0 16:45 pts/7    00:00:00 ./0root     21832 21806  0 16:45 pts/0    00:00:00 grep --color=auto  ./0[root@localhost ~]# cat /proc/21802/maps00400000-00401000 r-xp 00000000 fd:00 252008457                          /data/source/linux/000600000-00601000 r--p 00000000 fd:00 252008457                          /data/source/linux/000601000-00602000 rw-p 00001000 fd:00 252008457                          /data/source/linux/07fc6e03c5000-7fc6e057d000 r-xp 00000000 fd:00 153635                     /usr/lib64/libc-2.17.so7fc6e057d000-7fc6e077d000 ---p 001b8000 fd:00 153635                     /usr/lib64/libc-2.17.so7fc6e077d000-7fc6e0781000 r--p 001b8000 fd:00 153635                     /usr/lib64/libc-2.17.so7fc6e0781000-7fc6e0783000 rw-p 001bc000 fd:00 153635                     /usr/lib64/libc-2.17.so7fc6e0783000-7fc6e0788000 rw-p 00000000 00:00 0 7fc6e0788000-7fc6e07a9000 r-xp 00000000 fd:00 153628                     /usr/lib64/ld-2.17.so7fc6e099c000-7fc6e099f000 rw-p 00000000 00:00 0 7fc6e09a7000-7fc6e09a9000 rw-p 00000000 00:00 0 7fc6e09a9000-7fc6e09aa000 r--p 00021000 fd:00 153628                     /usr/lib64/ld-2.17.so7fc6e09aa000-7fc6e09ab000 rw-p 00022000 fd:00 153628                     /usr/lib64/ld-2.17.so7fc6e09ab000-7fc6e09ac000 rw-p 00000000 00:00 0 7ffe3c606000-7ffe3c627000 rw-p 00000000 00:00 0                          [stack]7ffe3c6b3000-7ffe3c6b5000 r-xp 00000000 00:00 0                          [vdso]ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall][root@localhost ~]#

没有[heap]的存在。

malloc不是系统调用,man malloc解释如下

[…] allocate dynamic memory[…]
void *malloc(size_t size);
[…]
The malloc() function allocates size bytes and returns a pointer to the allocated memory.

malloc调用了什么系统函数?可以通过strace来分析

[root@localhost linux]# cat 3-main.c #include #include #include /** * main - let's find out which syscall malloc is using * * Return: EXIT_FAILURE if something failed. Otherwise EXIT_SUCCESS */int main(void){    void *p;    write(1, "BEFORE MALLOC\n", 14);    p = malloc(1);    write(1, "AFTER MALLOC\n", 13);    printf("%p\n", p);    getchar();    return (EXIT_SUCCESS);}

编译执行,strace输出如下

[root@localhost ~]# strace ./3execve("./3", ["./3"], [/* 25 vars */]) = 0brk(NULL)                               = 0x1abe000mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fe132467000access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3fstat(3, {st_mode=S_IFREG|0644, st_size=34897, ...}) = 0mmap(NULL, 34897, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fe13245e000close(3)                                = 0open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\35\2\0\0\0\0\0"..., 832) = 832fstat(3, {st_mode=S_IFREG|0755, st_size=2127336, ...}) = 0mmap(NULL, 3940800, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fe131e84000mprotect(0x7fe13203c000, 2097152, PROT_NONE) = 0mmap(0x7fe13223c000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b8000) = 0x7fe13223c000mmap(0x7fe132242000, 16832, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fe132242000close(3)                                = 0mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fe13245d000mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fe13245b000arch_prctl(ARCH_SET_FS, 0x7fe13245b740) = 0mprotect(0x7fe13223c000, 16384, PROT_READ) = 0mprotect(0x600000, 4096, PROT_READ)     = 0mprotect(0x7fe132468000, 4096, PROT_READ) = 0munmap(0x7fe13245e000, 34897)           = 0write(1, "BEFORE MALLOC\n", 14BEFORE MALLOC)         = 14brk(NULL)                               = 0x1abe000brk(0x1adf000)                          = 0x1adf000brk(NULL)                               = 0x1adf000write(1, "AFTER MALLOC\n", 13AFTER MALLOC)          = 13fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 5), ...}) = 0mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fe132466000write(1, "0x1abe010\n", 100x1abe010)             = 10fstat(0, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 5), ...}) = 0mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fe132465000read(0,

可以看到,malloc调用了brk分配堆内存,大小为0x21000,查看进程的maps

[root@localhost linux]# cat /proc/14502/maps00400000-00401000 r-xp 00000000 fd:00 36596343                           /root/300600000-00601000 r--p 00000000 fd:00 36596343                           /root/300601000-00602000 rw-p 00001000 fd:00 36596343                           /root/301abe000-01adf000 rw-p 00000000 00:00 0                                  [heap]7fe131e84000-7fe13203c000 r-xp 00000000 fd:00 153635                     /usr/lib64/libc-2.17.so7fe13203c000-7fe13223c000 ---p 001b8000 fd:00 153635                     /usr/lib64/libc-2.17.so7fe13223c000-7fe132240000 r--p 001b8000 fd:00 153635                     /usr/lib64/libc-2.17.so7fe132240000-7fe132242000 rw-p 001bc000 fd:00 153635                     /usr/lib64/libc-2.17.so7fe132242000-7fe132247000 rw-p 00000000 00:00 0 7fe132247000-7fe132268000 r-xp 00000000 fd:00 153628                     /usr/lib64/ld-2.17.so7fe13245b000-7fe13245e000 rw-p 00000000 00:00 0 7fe132465000-7fe132468000 rw-p 00000000 00:00 0 7fe132468000-7fe132469000 r--p 00021000 fd:00 153628                     /usr/lib64/ld-2.17.so7fe132469000-7fe13246a000 rw-p 00022000 fd:00 153628                     /usr/lib64/ld-2.17.so7fe13246a000-7fe13246b000 rw-p 00000000 00:00 0 7ffdfb7b5000-7ffdfb7d6000 rw-p 00000000 00:00 0                          [stack]7ffdfb7ef000-7ffdfb7f1000 r-xp 00000000 00:00 0                          [vdso]ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall][root@localhost linux]#

01abe000-01adf000 rw-p 00000000 00:00 0 [heap]
与strace跟踪输出相符。

执行上面的样例代码

[root@localhost linux]# ./3BEFORE MALLOCAFTER MALLOC0x1123010

输出为0x1123010,但实际的开始地址为0x1123000,多出来的0x10一共16个字节是什么呢?实际上,这16个字节,低8位为上一个未分配的chunk的大小(如已分配则为0x0),高8位为block的大小。

[root@localhost linux]# cat 5-main.c #include #include #include /**                                                                                             * pmem - print mem                                                                             * @p: memory address to start printing from                                                    * @bytes: number of bytes to print                                                             *                                                                                              * Return: nothing                                                                              */void pmem(void *p, unsigned int bytes){    unsigned char *ptr;    unsigned int i;    ptr = (unsigned char *)p;    for (i = 0; i < bytes; i++)    {        if (i != 0)        {            printf(" ");        }        printf("x", *(ptr + i));    }    printf("\n");}/** * main - the 0x10 lost bytes * * Return: EXIT_FAILURE if something failed. Otherwise EXIT_SUCCESS */int main(void){    void *p;    int i;    for (i = 0; i < 10; i++)    {        p = malloc(1024 * (i + 1));        printf("%p\n", p);        printf("bytes at %p:\n", (void *)((char *)p - 0x10));        pmem((char *)p - 0x10, 0x10);    }    return (EXIT_SUCCESS);}[root@localhost linux]#

编译执行

[root@localhost linux]# ./50x2416010bytes at 0x2416000:00 00 00 00 00 00 00 00 11 04 00 00 00 00 00 00...

这是p指向的内存地址的首16个字节中的内容,0x4011,其中0x4010是block的大小(1024个字节+16个字节),0x0001是标记位,用于标记上一个chunk是否正在使用。

二、参考资料

Virtual memory
Hack the Virtual Memory: malloc, the heap & the program break

0