[安全] pickle-exe 题解
2025 “网安卫士” GDUT计算机学院院赛? pickle-exe WP
前期观察
首先反编译发现是 pyinstaller 打包的软件,通过 pyinstxtractor-ng <filename> 进行解包
解压后找到 chal.pyc,放到 PyLingual 进行反编译,获得源代码
# Decompiled with PyLingual (https://pylingual.io)
# Internal filename: chal.py
# Bytecode version: 3.11a7e (3495)
# Source timestamp: 1970-01-01 00:00:00 UTC (0)
import base64
import pickle
def dummy_function(*args):
return
if __name__ == '__main__':
bytecode = base64.b64decode('Y19fbWFpbl9fCmR1bW15X2Z1bmN0aW9uCnAwCihnMAooY19fYnVpbHRpbl9fCmdldGF0dHIKcDEKKGNyYW5kb20KUmFuZG9tClZzZWVkCnAyCnRScDMKKGNyYW5kb20KUmFuZG9tCnA0Cih0UnA1CkkyMTgxMDU2MzMKdHA2ClJwNwp0cDgKUnA5CmcwCihjX19idWlsdGluX18KcHJpbnQKcDEwCihWaXMgY29ycmVjdDoKcDExCmcxCihjX19idWlsdGluX18KdW5pY29kZQpwMTIKVl9fZXFfXwpwMTMKdHAxNApScDE1CihnMQooZzEyClZzdHJpcApwMTYKdHAxNwpScDE4CihjX19idWlsdGluX18KaW5wdXQKcDE5CihWYW5zd2VyOiAKcDIwCnRwMjEKUnAyMgp0cDIzClJwMjQKZzEKKGcxMgpWX19hZGRfXwpwMjUKdHAyNgpScDI3CihnMjcKKFZmbGFnewpwMjgKZzI3CihnMQooY19fYnVpbHRpbl9fCmJ5dGVzCnAyOQpWZGVjb2RlCnAzMAp0cDMxClJwMzIKKGcyOQooKGxwMzMKZzEKKGNfX2J1aWx0aW5fXwpsb25nCnAzNApWX194b3JfXwpwMzUKdHAzNgpScDM3CihJNjAKSTc2CnRwMzgKUnAzOQphZzM3CihJMTI1Ckk3Ngp0cDQwClJwNDEKYWczNwooSTQ3Ckk3Ngp0cDQyClJwNDMKYWczNwooSTcKSTc2CnRwNDQKUnA0NQphZzM3CihJMzIKSTc2CnRwNDYKUnA0NwphZzM3CihJMTI3Ckk3Ngp0cDQ4ClJwNDkKYWczNwooSTE5Ckk3Ngp0cDUwClJwNTEKYWczNwooSTM3Ckk3Ngp0cDUyClJwNTMKYWczNwooSTEyMQpJNzYKdHA1NApScDU1CmFnMzcKKEkxOQpJNzYKdHA1NgpScDU3CmFnMzcKKEk2MwpJNzYKdHA1OApScDU5CmFnMzcKKEkxMjQKSTc2CnRwNjAKUnA2MQphZzM3CihJMzUKSTc2CnRwNjIKUnA2MwphZzM3CihJMTI0Ckk3Ngp0cDY0ClJwNjUKYWczNwooSTM1Ckk3Ngp0cDY2ClJwNjcKYWczNwooSTE5Ckk3Ngp0cDY4ClJwNjkKYWczNwooSTYwCkk3Ngp0cDcwClJwNzEKYWczNwooSTEyNApJNzYKdHA3MgpScDczCmFnMzcKKEk1OQpJNzYKdHA3NApScDc1CmFnMzcKKEkxMjcKSTc2CnRwNzYKUnA3NwphZzM3CihJNjIKSTc2CnRwNzgKUnA3OQphZzM3CihJNjIKSTc2CnRwODAKUnA4MQphZzM3CihJNjIKSTc2CnRwODIKUnA4MwphZzM3CihJNDIKSTc2CnRwODQKUnA4NQphZzM3CihJMjUKSTc2CnRwODYKUnA4NwphZzM3CihJMTI1Ckk3Ngp0cDg4ClJwODkKYWczNwooSTEyNQpJNzYKdHA5MApScDkxCmFnMzcKKEkxMjUKSTc2CnRwOTIKUnA5MwphZzM3CihJMTkKSTc2CnRwOTQKUnA5NQphdHA5NgpScDk3ClZhc2NpaQpwOTgKdHA5OQpScDEwMApnMQooZzI5ClZoZXgKcDEwMQp0cDEwMgpScDEwMwooZzI5CigobHAxMDQKZzEKKGNyYW5kb20KUmFuZG9tClZyYW5kaW50CnAxMDUKdFJwMTA2CihnNQpJMApJMjU1CnRwMTA3ClJwMTA4CmFnMTA2CihnNQpJMApJMjU1CnRwMTA5ClJwMTEwCmFnMTA2CihnNQpJMApJMjU1CnRwMTExClJwMTEyCmFnMTA2CihnNQpJMApJMjU1CnRwMTEzClJwMTE0CmFnMTA2CihnNQpJMApJMjU1CnRwMTE1ClJwMTE2CmFnMTA2CihnNQpJMApJMjU1CnRwMTE3ClJwMTE4CmFnMTA2CihnNQpJMApJMjU1CnRwMTE5ClJwMTIwCmFnMTA2CihnNQpJMApJMjU1CnRwMTIxClJwMTIyCmF0cDEyMwpScDEyNAp0cDEyNQpScDEyNgp0cDEyNwpScDEyOAp0cDEyOQpScDEzMApWfQpwMTMxCnRwMTMyClJwMTMzCnRwMTM0ClJwMTM1CnRwMTM2ClJwMTM3CnRwMTM4ClJwMTM5CnRwMTQwClJwMTQxCi4=')
pickle.loads(bytecode)知识补充喵!
对于被 pickle 序列化的对象,可以使用 pickletools 来确定它在被 pickle.loads() 的时候会发生什么
其实就是将 pickle 字节码翻译成人类能看懂的样子,然后输出而并不执行
pickle 字节码由 pickle 虚拟机运行,pickle 虚拟机中有三部分,分别是字节码、栈和备忘录
栈是一个 list 而 备忘录是一个 dist
使用 pickletools.dis(bytecode) 可以翻译字节码
使用 pickletools.genops(pickle: bytes | bytearray | IO[bytes]) -> Iterator[tuple[OpcodeInfo, Any | None, int | None]] 可以获取一个字节码的迭代器
由于 pickle 字节码没有判断和跳转,所以实际上合法的 pickle 字节码就是一些完全线性的操作
genops 返回的 tuple 可以解包成三个值 (opcode, arg, pos) 即 指令、参数、指令位置
指令位置就是 dis 函数的输出的每一行开头的那个数字
可以通过 opcode.name 获取该 opcode 的一个方便阅读的名字
然后归纳一下这道题目出现的一些指令
with open('./test.log', "r") as f: # test.log 是 dis 的输出
data = f.read()
data = data.split()
wordset = set()
for word in data:
if word.isalpha() and word.isupper() and len(word) != 1:
wordset.add(word)
print(wordset)输出
{'REDUCE', 'PUT', 'STOP', 'APPEND', 'LIST', 'UNICODE', 'MARK', 'TUPLE', 'GLOBAL', 'GET', 'INT'}
- PUT / GET:
可以将栈顶的元素复制一份到 arg 指定的备忘录空间上
例如PUT 1表示将栈顶元素复制到memo[1]里 - UNICODE / INT
基本类型,将这个类型的参数压入栈中 - MARK
往栈中压入一个 MARK 标记 - LIST / TUPLE
直到遇到第一个标记为止,将栈的元素按照栈中先底后顶的顺序压成一个list/tuple
会将标记弹出 - APPEND
弹出一个元素,再弹出一个list,将元素append到list里面,再把新list压到栈中 - GLOBAL
导入 arg 指定的全局对象,压入栈中 - REDUCE
弹出栈顶的一个元组作为参数,再弹出栈顶的一个可调用对象,并使用参数调用它
即执行可调用对象(*参数元组) - STOP
停机
此时栈顶剩下的一个对象会由pickle.loads()返回给调用者
既然是虚拟机,dis 的输出又太难看,而且这还是一个简单的线性操作合集
那么我们就有了一个想法,把每次 REDUCE 调用的函数输出,我们就能知道它在干嘛,而且会比 dis 清晰易懂
翻译和分析
import pickletools
import base64
ContainerSet = set(["TUPLE", "LIST", ])
_ = "MARK", "APPEND", "REDUCE", "GLOBAL"
_ = "PUT", "GET",
_ = "STOP",
def interpret(pickle_stream):
stack = []
memo = {}
IP = 0
for opcode, arg, pos in pickletools.genops(pickle_stream):
name = opcode.name
print(pos, end=" ")
if name in ContainerSet:
items = []
while stack and stack[-1] != "MARK":
items.append(stack.pop())
if stack and stack[-1] == "MARK":
stack.pop()
items.reverse()
if name == "TUPLE":
stack.append(f"({", ".join(items)})")
elif name == "LIST":
stack.append(f"[{", ".join(items)}]")
elif name == "INT":
stack.append(str(arg))
elif name == "UNICODE":
stack.append(f"'{arg}'")
elif name == "MARK":
stack.append(name)
elif name == "GLOBAL":
assert(type(arg) == str)
stack.append(arg.replace(" ", "."))
elif name == "APPEND":
item = stack.pop()
itml = stack.pop()
if itml == "[]":
stack.append(f"[{item}]")
else:
stack.append(itml[:-1] + f", {item}]")
elif name == "REDUCE":
args = stack.pop()
func = stack.pop()
rest = f"func_{IP}"
print(f"func_{IP} = {func}{args}")
stack.append(rest)
IP += 1
elif name == "PUT":
memo[arg] = stack[-1]
elif name == "GET":
stack.append(memo[arg])
elif name == "STOP":
break
if __name__ == "__main__":
bytecode = base64.b64decode("====")
print("import random")
# print("\nfunc = {}\n")
print("def dummy_function(*args):")
print(" return\n\n\n")
interpret(bytecode)其实一开始的 log 打印部分不是这样的,是类似于下面的形式
[00] __builtin__.getattr(random.Random, 'seed')
[01] random.Random()
[02] RESULT(00)(RESULT(01), 218105633)
[03] __main__.dummy_function(RESULT(02))
[04] __builtin__.getattr(__builtin__.unicode, '__eq__')
[05] __builtin__.getattr(__builtin__.unicode, 'strip')
[06] __builtin__.input('answer: ')
[07] RESULT(05)(RESULT(06))
[08] __builtin__.getattr(__builtin__.unicode, '__add__')
[09] __builtin__.getattr(__builtin__.bytes, 'decode')
[10] __builtin__.getattr(__builtin__.long, '__xor__')
[11] RESULT(10)(60, 76)
[12] RESULT(10)(125, 76)
[13] RESULT(10)(47, 76)
[14] RESULT(10)(7, 76)
[15] RESULT(10)(32, 76)
[16] RESULT(10)(127, 76)
[17] RESULT(10)(19, 76)
[18] RESULT(10)(37, 76)
[19] RESULT(10)(121, 76)
[20] RESULT(10)(19, 76)
[21] RESULT(10)(63, 76)
[22] RESULT(10)(124, 76)
[23] RESULT(10)(35, 76)
[24] RESULT(10)(124, 76)
[25] RESULT(10)(35, 76)
[26] RESULT(10)(19, 76)
[27] RESULT(10)(60, 76)
[28] RESULT(10)(124, 76)
[29] RESULT(10)(59, 76)
[30] RESULT(10)(127, 76)
[31] RESULT(10)(62, 76)
[32] RESULT(10)(62, 76)
[33] RESULT(10)(62, 76)
[34] RESULT(10)(42, 76)
[35] RESULT(10)(25, 76)
[36] RESULT(10)(125, 76)
[37] RESULT(10)(125, 76)
[38] RESULT(10)(125, 76)
[39] RESULT(10)(19, 76)
[40] __builtin__.bytes([RESULT(11), RESULT(12), RESULT(13), RESULT(14), RESULT(15), RESULT(16), RESULT(17), RESULT(18), RESULT(19), RESULT(20), RESULT(21), RESULT(22), RESULT(23), RESULT(24), RESULT(25), RESULT(26), RESULT(27), RESULT(28), RESULT(29), RESULT(30), RESULT(31), RESULT(32), RESULT(33), RESULT(34), RESULT(35), RESULT(36), RESULT(37), RESULT(38), RESULT(39)])
[41] RESULT(09)(RESULT(40), 'ascii')
[42] __builtin__.getattr(__builtin__.bytes, 'hex')
[43] __builtin__.getattr(random.Random, 'randint')
[44] RESULT(43)(RESULT(01), 0, 255)
[45] RESULT(43)(RESULT(01), 0, 255)
[46] RESULT(43)(RESULT(01), 0, 255)
[47] RESULT(43)(RESULT(01), 0, 255)
[48] RESULT(43)(RESULT(01), 0, 255)
[49] RESULT(43)(RESULT(01), 0, 255)
[50] RESULT(43)(RESULT(01), 0, 255)
[51] RESULT(43)(RESULT(01), 0, 255)
[52] __builtin__.bytes([RESULT(44), RESULT(45), RESULT(46), RESULT(47), RESULT(48), RESULT(49), RESULT(50), RESULT(51)])
[53] RESULT(42)(RESULT(52))
[54] RESULT(08)(RESULT(41), RESULT(53))
[55] RESULT(08)('flag{', RESULT(54))
[56] RESULT(08)(RESULT(55), '}')
[57] RESULT(04)(RESULT(07), RESULT(56))
[58] __builtin__.print('is correct:', RESULT(57))
[59] __main__.dummy_function(RESULT(58))
[60] __main__.dummy_function(RESULT(03), RESULT(59))把它改成这样的输出大概是我连 log 也懒得看打算打印成 py 看一下结果发现好像稍作修改就可以使报错全部消失
而且还可以使用 F2 来重命名变量,差不多相当于自己写了一个反编译器ww
经过了重命名之后的输出是这样的
import random
def dummy_function(*args):
return
Random_set_seed = __builtins__.getattr(random.Random, 'seed')
Kls_Random = random.Random()
None_2 = Random_set_seed(Kls_Random, 218105633)
None_3 = dummy_function(None_2)
str_eq = __builtins__.getattr(__builtins__.str, '__eq__')
str_strip = __builtins__.getattr(__builtins__.str, 'strip')
input_str_not_strip = __builtins__.input('answer: ')
input_str = str_strip(input_str_not_strip)
str_add = __builtins__.getattr(__builtins__.str, '__add__')
bytes_decode = __builtins__.getattr(__builtins__.bytes, 'decode')
int_xor = __builtins__.getattr(__builtins__.int, '__xor__')
func_11 = int_xor(60, 76)
func_12 = int_xor(125, 76)
func_13 = int_xor(47, 76)
func_14 = int_xor(7, 76)
func_15 = int_xor(32, 76)
func_16 = int_xor(127, 76)
func_17 = int_xor(19, 76)
func_18 = int_xor(37, 76)
func_19 = int_xor(121, 76)
func_20 = int_xor(19, 76)
func_21 = int_xor(63, 76)
func_22 = int_xor(124, 76)
func_23 = int_xor(35, 76)
func_24 = int_xor(124, 76)
func_25 = int_xor(35, 76)
func_26 = int_xor(19, 76)
func_27 = int_xor(60, 76)
func_28 = int_xor(124, 76)
func_29 = int_xor(59, 76)
func_30 = int_xor(127, 76)
func_31 = int_xor(62, 76)
func_32 = int_xor(62, 76)
func_33 = int_xor(62, 76)
func_34 = int_xor(42, 76)
func_35 = int_xor(25, 76)
func_36 = int_xor(125, 76)
func_37 = int_xor(125, 76)
func_38 = int_xor(125, 76)
func_39 = int_xor(19, 76)
bytes_part_of_flag = __builtins__.bytes([func_11, func_12, func_13, func_14, func_15, func_16, func_17, func_18, func_19, func_20, func_21, func_22, func_23, func_24, func_25, func_26, func_27, func_28, func_29, func_30, func_31, func_32, func_33, func_34, func_35, func_36, func_37, func_38, func_39])
part_of_flag = bytes_decode(bytes_part_of_flag, 'ascii')
bytes_hex = __builtins__.getattr(__builtins__.bytes, 'hex')
randint = __builtins__.getattr(random.Random, 'randint')
func_44 = randint(Kls_Random, 0, 255)
func_45 = randint(Kls_Random, 0, 255)
func_46 = randint(Kls_Random, 0, 255)
func_47 = randint(Kls_Random, 0, 255)
func_48 = randint(Kls_Random, 0, 255)
func_49 = randint(Kls_Random, 0, 255)
func_50 = randint(Kls_Random, 0, 255)
func_51 = randint(Kls_Random, 0, 255)
bytes_suff_flag = __builtins__.bytes([func_44, func_45, func_46, func_47, func_48, func_49, func_50, func_51])
suff_flag = bytes_hex(bytes_suff_flag)
flag = str_add(part_of_flag, suff_flag)
flagg = str_add('flag{', flag)
flaggg = str_add(flagg, '}')
ok = str_eq(input_str, flaggg)
None_58 = __builtins__.print('is correct:', ok)
None_59 = dummy_function(None_58)
None_60 = dummy_function(None_3, None_59)实际上这并不是加密算法,而是在构造 flag,所以模仿它的逻辑写一个脚本就可以得到 flag 了
import random
xor_pref = [60, 125, 47, 7, 32, 127, 19, 37, 121, 19, 63, 124, 35, 124, 35, 19, 60, 124, 59, 127, 62, 62, 62, 42, 25, 125, 125, 125, 19, ]
list_pref = [v ^ 76 for v in xor_pref]
bytes_pref = bytes(list_pref)
pref = bytes_pref.decode()
Kls_Random = random.Random()
random.Random.seed(Kls_Random, 218105633)
list_suff = [Kls_Random.randint(0, 255) for _ in range(8)]
bytes_suff = bytes(list_suff)
suff = bytes_suff.hex()
print(f"flag{{{pref + suff}}}")flag 是 flag{p1cKl3_i5_s0o0o_p0w3rrrfU111_22584bab1d01285c}
结束了吗?
经过出题人的提醒,我如梦方醒,发现可以直接在得到 flag 之后打印。。。
...
flag = str_add(part_of_flag, suff_flag)
flagg = str_add('flag{', flag)
flaggg = str_add(flagg, '}')
print(flaggg)
...或者直接修改 pickle 字节码也行(出题人是聪明猫猫<3)