[安全] miniLCTF 2023 magical syscall 题解

miniLCTF2023 magical syscall WP

前期探查 和 Patch

尝试运行,发现输入太久直接退出了,输入错误也退出,调试器调试也退出

 ./magical_syscall
input your flag:
Never Give Up !!!!
 ./magical_syscall
input your flag:
hajimi
try again
 pwndbg ./magical_syscall
pwndbg: loaded 212 pwndbg commands. Type pwndbg [filter] for a list.
pwndbg: created 9 GDB functions (can be used with print/break). Type help function to see them.
Reading symbols from ./magical_syscall...
(No debugging symbols found in ./magical_syscall)
------- tip of the day (disable with set show-tips off) -------
GDB and Pwndbg parameters can be shown or set with show <param> and set <param> <value> GDB commands
pwndbg> r
Starting program: /home/featherXI/Documents/yuXIwei/CTF/A&D/d_process_vm/magical_syscall 
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/usr/lib/libthread_db.so.1".
debugger detected, exit...
[Inferior 1 (process 4973) exited with code 01]
pwndbg> exit

Strings view 查找这些字符串,然后 xrefs 之类的,反正你会查到 .init_array 里面的三个函数上

int64_t _INIT_1()
    signal(sig: 0xe, handler: sub_401219)
    signal(sig: 5, handler: sub_401236)
    return alarm(0xa)
 
void sub_401219() __noreturn
    puts(str: "Never Give Up !!!!")
    exit(status: 1)
    noreturn
 
void sub_401236() __noreturn
    puts(str: "debugger detected, exit...")
    exit(status: 1)
    noreturn

5SIGTRAP,通常由调试器触发,这是一个反调试的点
0xeSIGALARM,由 alarm 函数在计时结束后发送

signal 是注册函数,注册收到相应信号后的处理函数

这就是尝试运行和调试时候出现的限时和反调试的实现


不妨把其他 .init_array 里面的函数看了

int64_t _INIT_2()
 
    void* fsbase
    int64_t rax = *(fsbase + 0x28)
    FILE* fp = fopen(filename: "/proc/self/status", mode: "r")
    void var_118
    char* buf = &var_118
 
    while (fgets(buf, n: 0x100, fp) != 0)
        if (strstr(buf, "TracerPid") != 0)
            int32_t var_12c_1 = 0
            
            if (atoi(nptr: &buf[strlen(buf) - 3]) != 0)
                puts(str: "debugger detected, exit...")
                exit(status: 1)
                noreturn
 
    if (rax == *(fsbase + 0x28))
        return rax - *(fsbase + 0x28)
 
    __stack_chk_fail()
    noreturn

该函数通过读取 /proc/self/status 中的 TracerPid 字段(非零表示被调试),并利用 &buf[strlen(buf) - 3] 从行末提取PID值来判断是否存在调试器,从而在检测到时主动退出程序实现反调试

为了解决这两个反调试的点,我直接把 _INIT_1 整个 nop 掉了,然后 _INIT_2 改了分支(话说 bn 改分支确实方便啊右键一点就有)


int64_t _INIT_3()
    pid_t rax = fork()
    
    if (rax s< 0)
        puts(str: "failed to creat subprocess")
        exit(status: 1)
        noreturn
    
    if (rax == 0)
        return sub_401386()
    
    sub_4013b5(rax)
    noreturn

很奇怪,这里 fork 了一下,创建了一个子进程,fork 的返回值对于父进程来说是子进程的 pid,对于子进程来说是 0
也就是说这里父进程和子进程进入了完全不一样的路径,我们分别来看看

  1. 子进程

    int64_t sub_401386()
        ptrace(request: PTRACE_TRACEME, 0, 0, 0)
        return raise(sig: 0x12)

    ptrace(request: PTRACE_TRACEME, 0, 0, 0) 尝试让父进程对本进程进行调试
    raise(SIGCONT) 让本进程继续执行,接下来就会进入 main 函数了

    int32_t main(int32_t argc, char** argv, char** envp)
        puts(str: "input your flag:")
        
        while (true)
            int32_t rax_1 = data_4040a0
            syscall(zx.q((&data_4040ac)[zx.q(rax_1) + 0x1d4]), 
                zx.q((&data_4040ac)[zx.q(rax_1 + 1) + 0x1d4]), 
                zx.q((&data_4040ac)[zx.q(rax_1 + 2) + 0x1d4]), 
                zx.q((&data_4040ac)[zx.q(rax_1 + 3) + 0x1d4]))

    其实谁能第一眼不看 main 函数呢?不过看了也看不太懂,它仅仅是在无限的循环调用 syscall 而且还是传入相同的参数,十分可疑。
    好的,看不懂就先看别的吧~

  2. 父进程 父进程调用 sub_4013b5(rax) 函数
    我们能看到这里还有一个反调试的点,改跳转即可
    同时是一个大的循环加上很多分支的判断,由此我们可以猜测这实际上是一道 VM 题目,这里正是 VM 的主要代码

运行机制

能看到整个程序都有很多 ptrace 调用,先查询了解一下相关信息

  • PTRACE_SETOPTIONS:设置跟踪选项,第二个参数为被跟踪子进程的 PID,第三个参数为 0,第四个参数为选项标志 PTRACE_O_EXITKILL,在父进程意外退出的情况下也杀死子进程,防止其变成孤儿进程
  • PTRACE_SYSCALL:使子进程在每次 syscall 进入或退出时停止,以便跟踪器检查,第三、四个参数为 0 表示无附加数据
  • PTRACE_GETREGS:获取子进程的通用寄存器值,第三个参数为 0,第四个参数指向存储寄存器值的结构体
  • PTRACE_SETREGS:通过结构体设置子进程的寄存器值
  • PTRACE_POKEDATA:向子进程的内存地址写入一个 word,数据为最后一个参数
  • PTRACE_PEEKDATA:从子进程的内存地址读取一个 word,返回
  • waitpid:阻塞等待子进程的状态变化,并将状态信息存入 stat_loc,第三个参数为 0 表示默认选项

联想一下 PTRACE_SYSCALL 和 子进程循环调用的 syscall,我们就可以大致猜到题目这个 VM 是怎样运行的
子进程通过调用 syscall 将 VM 的参数通过寄存器传给父进程,父进程根据这些参数执行不同的指令
每一个 syscall 调用实际上就是字节码中一条指令的运行

这里有必要补充一下寄存器结构体,不然看起来怪怪的

struct user_regs_struct
{
    uint64_t r15;
    uint64_t r14;
    uint64_t r13;
    uint64_t r12;
    uint64_t rbp;
    uint64_t rbx;
    uint64_t r11;
    uint64_t r10;
    uint64_t r9;
    uint64_t r8;
    uint64_t rax;
    uint64_t rcx;
    uint64_t rdx;
    uint64_t rsi;
    uint64_t rdi;
    uint64_t orig_rax;
    uint64_t rip;
    uint64_t cs;
    uint64_t eflags;
    uint64_t rsp;
    uint64_t ss;
    uint64_t fs_base;
    uint64_t gs_base;
    uint64_t ds;
    uint64_t es;
    uint64_t fs;
    uint64_t gs;
};

var_e8 设置成这个类型之后 VM 的代码会正常一点,方便接下来的指令解析

附 VM 反编译代码

void sub_4013b5(pid_t arg1) __no_return
{
    void* fsbase;
    int64_t var_10 = *(uint64_t*)((char*)fsbase + 0x28);
    int32_t stat_loc;
    waitpid(arg1, &stat_loc, 0);
    
    if ((uint32_t)(uint8_t)stat_loc != 0x7f)
    {
        puts("debugger detected, exit...");
        exit(1);
        /* no return */
    }
    
    ptrace(PTRACE_SETOPTIONS, (uint64_t)arg1, 0, 0x100000);
    
    while (true)
    {
        ptrace(PTRACE_SYSCALL, (uint64_t)arg1, 0, 0);
        waitpid(arg1, &stat_loc, 0);
        struct user_regs_struct var_e8;
        ptrace(PTRACE_GETREGS, (uint64_t)arg1, 0, &var_e8);
        
        if (var_e8.orig_rax == 0x22b8)
        {
            puts("try again");
            exit(1);
            /* no return */
        }
        
        if (var_e8.orig_rax == 0x270f)
            break;
        
        if (var_e8.orig_rax == 0xf3f)
        {
            var_e8.orig_rax = 0;
            var_e8.rdi = 0;
            var_e8.rsi = &data_4040b4;
            var_e8.rdx = 1;
            ptrace(PTRACE_SETREGS, (uint64_t)arg1, 0, &var_e8);
            data_4040a0 += 1;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        if (var_e8.orig_rax == 0xf3d)
        {
            int32_t* var_118_1;
            __builtin_memset(&var_118_1, 0, 0x18);
            
            if (!var_e8.rdi)
                var_118_1 = &data_4040a8;
            else if (var_e8.rdi == 1)
                var_118_1 = &data_4040ac;
            
            wchar32* var_110_1;
            
            if (!var_e8.rsi)
                var_110_1 = &data_4041bc;
            else if (var_e8.rsi == 1)
                var_110_1 = U"MiniLCTF2023";
            else if (var_e8.rsi == 2)
                var_110_1 = &data_4045ec;
            
            int32_t* var_108_1;
            
            if (!var_e8.rdx)
                var_108_1 = &data_4040a8;
            else if (var_e8.rdx == 1)
                var_108_1 = &data_4040ac;
            else if (var_e8.rdx == 2)
                var_108_1 = &data_4040b0;
            
            *(uint32_t*)var_118_1 += var_110_1[(uint64_t)*(uint32_t*)var_108_1];
            data_4040a0 += 4;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        if (var_e8.orig_rax == 0xf3e)
        {
            int32_t* var_100_1 = nullptr;
            
            if (!var_e8.rdi)
                var_100_1 = &data_4040a8;
            else if (var_e8.rdi == 1)
                var_100_1 = &data_4040ac;
            
            *(uint32_t*)var_100_1 =
                (int32_t)(COMBINE(0, (uint64_t)*(uint32_t*)var_100_1) % var_e8.rsi);
            data_4040a0 += 3;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        if (var_e8.orig_rax == 0xf40)
        {
            data_4040b4 = ptrace(PTRACE_PEEKDATA, (uint64_t)arg1, &data_4040b4, 0);
            data_4040a4 += 1;
            (&data_4040ac)[(uint64_t)data_4040a4 + 4] = data_4040b4;
            data_4040a0 += 1;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        if (var_e8.orig_rax == 0xf41)
        {
            data_4040b4 = (&data_4040ac)[(uint64_t)data_4040a4 + 4];
            data_4040a4 -= 1;
            data_4040a0 += 1;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        if (var_e8.orig_rax == 0xf42)
        {
            if (!var_e8.rdi)
            {
                uint64_t rax_78;
                (uint8_t)rax_78 = (uint64_t)data_4040b0 == var_e8.rsi;
                data_4040b8 = (uint32_t)(uint8_t)rax_78;
            }
            else if (var_e8.rdi == 1)
            {
                int32_t rax_81;
                (uint8_t)rax_81 = data_4040a8 == data_4040b4;
                data_4040b8 = (uint32_t)(uint8_t)rax_81;
            }
            
            data_4040a0 += 3;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        if (var_e8.orig_rax == 0xf43)
        {
            if (!data_4040b8)
            {
                data_4040a0 += 2;
                ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                    (uint64_t)data_4040a0);
            }
            else
            {
                data_4040a0 = (int32_t)var_e8.rdi;
                ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                    (uint64_t)data_4040a0);
            }
        }
        
        if (var_e8.orig_rax == 0xf44)
        {
            if (data_4040b8)
            {
                data_4040a0 += 2;
                ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                    (uint64_t)data_4040a0);
            }
            else
            {
                data_4040a0 = (int32_t)var_e8.rdi;
                ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                    (uint64_t)data_4040a0);
            }
        }
        
        if (var_e8.orig_rax == 0xf45)
        {
            if (!var_e8.rdi)
                data_4040a8 = data_4040b0;
            else if (var_e8.rdi == 1)
                *(uint32_t*)(((uint64_t)data_4040b0 << 2) + &data_4041bc) = data_4040b0;
            else if (var_e8.rdi == 2)
                data_4040a8 = *(uint32_t*)(&data_4041bc + ((uint64_t)data_4040b0 << 2));
            else if (var_e8.rdi == 3)
                data_4040a8 =
                    *(uint32_t*)((((uint64_t)data_4040b0 + 0x10c) << 2) + 0x4041b8);
            else if (var_e8.rdi == 4)
                data_4040a8 = *(uint32_t*)(&data_4041bc + ((uint64_t)data_4040a8 << 2));
            
            data_4040a0 += 2;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        if (var_e8.orig_rax == 0xf46)
        {
            data_4040b4 ^= data_4040a8;
            data_4040a0 += 1;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        if (var_e8.orig_rax == 0xf47)
        {
            int32_t* var_f8;
            
            if (!var_e8.rdi)
                var_f8 = &data_4040b0;
            else if (var_e8.rdi == 1)
                var_f8 = &data_4040ac;
            
            *(uint32_t*)var_f8 += 1;
            data_4040a0 += 2;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        if (var_e8.orig_rax == 0xf48)
        {
            *(uint32_t*)(&data_4041bc + ((uint64_t)data_4040b0 << 2)) =
                *(uint32_t*)(&data_4041bc + ((uint64_t)data_4040ac << 2));
            *(uint32_t*)(&data_4041bc + ((uint64_t)data_4040ac << 2)) =
                *(uint32_t*)(&data_4041bc + ((uint64_t)data_4040b0 << 2));
            data_4040a0 += 1;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        if (var_e8.orig_rax == 0xf49)
        {
            int32_t* var_f0_1 = nullptr;
            
            if (!var_e8.rdi)
                var_f0_1 = &data_4040b0;
            else if (var_e8.rdi == 1)
                var_f0_1 = &data_4040ac;
            
            *(uint32_t*)var_f0_1 = 0;
            data_4040a0 += 2;
            ptrace(PTRACE_POKEDATA, (uint64_t)arg1, &data_4040a0, 
                (uint64_t)data_4040a0);
        }
        
        ptrace(PTRACE_SYSCALL, (uint64_t)arg1, 0, 0);
        waitpid(arg1, &stat_loc, 0);
    }
    
    puts("congratulations");
    exit(0);
    /* no return */
}

指令解析

解析的这一步是伴随着重命名的,因为在明白每个分支代表的含义的同时也能加深我们对程序结构和变量的理解,而且这个过程十分散,我不能完全地展示出来,因此下面的代码统一使用重命名后的
虽说如此,仍然有一些诀窍,在逐个分析的过程中讲解

  1. FAILEDSUCCESS

    if (sonREG.orig_rax == 8888)  // failed
    {
        puts("try again");
        exit(1);
        /* no return */
    }
     
    if (sonREG.orig_rax == 9999)  // success
        break;

    break 到外面之后就是 Congratulations 了,明显是成功,try again 明显是失败

  2. 输入指令 READ

    // syscall(read, stdin, &address, length = 1)
    if (sonREG.orig_rax == 0xf3f)
    {
        sonREG.orig_rax = 0;  // sysread
        sonREG.rdi = 0;  // stdin
        sonREG.rsi = &TMP;  // read at koko
        sonREG.rdx = 1;  // read 1 byte
        ptrace(PTRACE_SETREGS, (uint64_t)sonPID, 0, &sonREG);
        IP += 1;
        // son OP += 1
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }

    syscall 的第一个参数改成了 0,上网查询,这代表了 sysread,因此这个分支的含义是 往子进程的TMP里读入一个字节
    由于这里没有使用其他参数,因此 IP + 1
    怎么能判断出这是 IP 呢?因为每一个分支它都会增长,而且增长的数量由指令(1)和所需数据(使用的参数数量)的和决定
    从这里我们也可以看出这个指令集是不定长的,不同类型的指令 长度不一样
    TMP 暂时理解成一块有输入的地址好了

  3. 加法

    // Add Reg[rdi], Table[rsi][Reg[rdx]]
    if (sonREG.orig_rax == 0xf3d)
    {
        uint32_t* ptr[0x3];
        ptr[0] = 0;
        ptr[1] = 0;
        ptr[2] = 0;
        
        if (!sonREG.rdi)
            ptr[0] = &R0;
        else if (sonREG.rdi == 1)
            ptr[0] = &R1;
        
        if (!sonREG.rsi)
            ptr[1] = &Table_0;
        else if (sonREG.rsi == 1)
            ptr[1] = &Key;
        else if (sonREG.rsi == 2)
            ptr[1] = &Table_2;
        
        if (!sonREG.rdx)
            ptr[2] = &R0;
        else if (sonREG.rdx == 1)
            ptr[2] = &R1;
        else if (sonREG.rdx == 2)
            ptr[2] = &R2;
        
        // VMreg 0 or 1
        // += 
        // Table 0 or 1
        // [ VMreg 0 or 1 or 2 ]
        *(uint32_t*)ptr[0] += ptr[1][(uint64_t)*(uint32_t*)ptr[2]];
        IP += 4;
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }

    没什么特别的,根据参数来判断加哪个寄存器,以及加哪个表的哪个值

  4. 取模

    if (sonREG.orig_rax == 0xf3e)  // MOD Reg[rdi], rsi
    {
        uint32_t* VMreg_tmp = nullptr;
        
        if (!sonREG.rdi)
            VMreg_tmp = &R0;
        else if (sonREG.rdi == 1)
            VMreg_tmp = &R1;
        
        *(uint32_t*)VMreg_tmp =
            (int32_t)(COMBINE(0, (uint64_t)*(uint32_t*)VMreg_tmp) % sonREG.rsi);
        IP += 3;
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }

    对不同的寄存器取模,依旧易懂

  5. 入栈和出栈

    // PUSH TMP
    // sync with son
    if (sonREG.orig_rax == 0xf40)
    {
        // sync with son
        TMP = ptrace(PTRACE_PEEKDATA, (uint64_t)sonPID, &TMP, 0);
        SP += 1;
        (&R1)[(uint64_t)SP + 4] = TMP;
        IP += 1;
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }
     
    if (sonREG.orig_rax == 0xf41)  // POP -> TMP
    {
        TMP = (&R1)[(uint64_t)SP + 4];
        SP -= 1;
        IP += 1;
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }

    结合上面在子进程将 1 个字节读入 子进程的 TMP 中,这里的 0xf40 将子进程的 TMP 同步到父进程的 TMP
    然后某个类似指针的东西 +1,同时将 TMP 存入了一块内存中
    再结合下面的函数,TMP读取同一块内存的位置,指针 -1,联想一下,很容易明白这是栈的特征
    而且这个指针初始是 -1,很容易联想到是栈指针
    这两个指令分别是 push 和 pop

    5555555580a0  int32_t IP = 0x0
    5555555580a4  int32_t SP = -0x1
    5555555580a8  int32_t R0 = 0x0
    5555555580ac  int32_t R1 = 0x0
    5555555580b0  int32_t R2 = 0x0
    5555555580b4  int32_t TMP = 0x0
    5555555580b8  int32_t FLAG = 0x0
    
    5555555580bc                                      00 00 00 00              ....
    
    

    结合内存视图可以看出来,在一连串的寄存器后面紧贴着的就是栈内存

  6. 比较指令

    // CMP ;
    // rdi == 0 => CMP R2, rsi;
    // rdi == 1 => CMP R0, TMP;
    if (sonREG.orig_rax == 0xf42)
    {
        if (!sonREG.rdi)
        {
            uint64_t rax_76;
            (uint8_t)rax_76 = (uint64_t)R2 == sonREG.rsi;
            FLAG = (uint32_t)(uint8_t)rax_76;
        }
        else if (sonREG.rdi == 1)
        {
            int32_t rax_79;
            (uint8_t)rax_79 = R0 == TMP;
            FLAG = (uint32_t)(uint8_t)rax_79;
        }
        
        IP += 3;
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }

    将某个比较的结果存入变量中,很明显能看出来就是比较(
    存储结果的寄存器明显是标志位

  7. 跳转

    if (sonREG.orig_rax == 3907)  // JZ rdi
    {
        if (!FLAG)
        {
            IP += 2;
            ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
        }
        else
        {
            IP = (int32_t)sonREG.rdi;
            ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
        }
    }
     
    if (sonREG.orig_rax == 0xf44)  // JNZ rdi
    {
        if (FLAG)
        {
            IP += 2;
            ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
        }
        else
        {
            IP = (int32_t)sonREG.rdi;
            ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
        }
    }

    根据标志位来决定 IP 的值,明显是跳转的特征
    分析到这里基本上已经能一气呵成了

  8. 分情况赋值

    if (sonREG.orig_rax == 0xf45)
    {
        if (!sonREG.rdi)
            R0 = R2;
        else if (sonREG.rdi == 1)
            *(uint32_t*)(((uint64_t)R2 << 2) + &Table_0) = R2;
        else if (sonREG.rdi == 2)
            R0 = *(uint32_t*)(&Table_0 + ((uint64_t)R2 << 2));
        else if (sonREG.rdi == 3)
            R0 = *(uint32_t*)((((uint64_t)R2 + 0x10c) << 2) + 0x5555555581b8);
        else if (sonREG.rdi == 4)
            R0 = *(uint32_t*)(&Table_0 + ((uint64_t)R0 << 2));
        
        IP += 2;
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }

    分开五种情况进行赋值,其中 0x5555555581b8 的地址经过 0x10c * 4 的偏移后正好是 Table_2 - 1 的位置,因此那个表示 Table_2[R2 - 1]

  9. 异或

    if (sonREG.orig_rax == 0xf46)  // XOR TMP, R0
    {
        TMP ^= R0;
        IP += 1;
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }
  10. 自增

    if (sonREG.orig_rax == 0xf47)  // SUCC Address
    {
        int32_t* var_f8;
        
        if (!sonREG.rdi)
            var_f8 = &R2;
        else if (sonREG.rdi == 1)
            var_f8 = &R1;
        
        *(uint32_t*)var_f8 += 1;
        IP += 2;
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }
  11. 一个比较迷惑的赋值,也不太用管(

    if (sonREG.orig_rax == 0xf48)
    {
        // Table_0[R2] = Table_0[R1]
        *(uint32_t*)(&Table_0 + ((uint64_t)R2 << 2)) =
            *(uint32_t*)(&Table_0 + ((uint64_t)R1 << 2));
        // Table_0[R1] = Table_0[R2]
        *(uint32_t*)(&Table_0 + ((uint64_t)R1 << 2)) =
            *(uint32_t*)(&Table_0 + ((uint64_t)R2 << 2));
        IP += 1;
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }

    注意不要看成交换

  12. 寄存器置零

    if (sonREG.orig_rax == 0xf49)  // Mov 0, Rx
    {
        int32_t* var_f0_1 = nullptr;
        
        if (!sonREG.rdi)
            var_f0_1 = &R2;
        else if (sonREG.rdi == 1)
            var_f0_1 = &R1;
        
        *(uint32_t*)var_f0_1 = 0;
        IP += 2;
        ptrace(PTRACE_POKEDATA, (uint64_t)sonPID, &IP, (uint64_t)IP);
    }

至此每个操作指令分析完毕

字节码分析

看到仅 子进程 运行的 main() 函数,调用 syscall 的时候传入了四个变量,而这四个变量在 call syscall 的时候正是在寄存器上的
它们分别是 orig_rax(指令),rdi, rsi, rdx(后三个参数),因此并不需要使用动态调试来获取从子进程复制的寄存器值,因为它们(至少我们需要的部分)就是从字节码上面读出来的(我之前这里没想到还卡了一会,甚至使用 frida 读了一次想着写个很复杂的模拟执行得到容易理解的代码,现在看来是由于知识不牢固多虑了)

int32_t main(int32_t argc, char** argv, char** envp)
{
    puts("input your flag:");
    
    while (true)
    {
        int32_t IP_1 = IP;
        syscall((uint64_t)(&R1)[(uint64_t)IP_1 + 0x1d4], 
            (uint64_t)(&R1)[(uint64_t)(IP_1 + 1) + 0x1d4], 
            (uint64_t)(&R1)[(uint64_t)(IP_1 + 2) + 0x1d4], 
            (uint64_t)(&R1)[(uint64_t)(IP_1 + 3) + 0x1d4]);
    }
}
 

可以看到字节码的地址是 R1地址 + sizeof(typeof(R1)) * 0x1d4,R1 是 32 bits 的,因此可以计算出来地址,在这块地址上刚好有一段连续的看起来比较杂乱的数据,应该就是字节码

int32_t IP = 0x0
int32_t SP = -0x1
int32_t R0 = 0x0
int32_t R1 = 0x0
int32_t R2 = 0x0
int32_t TMP = 0x0
int32_t FLAG = 0x0

将 字节码 dump 成 base64,再根据类型转化为 i32 数组(请注意传入的时候强制转换是后来的事情,从 &R1 + delta 处用数组索引取出来的数自然和 R1 类型一致,为 i32

BYTECODE_BYTES = base64.b64decode(
    "SQ8AAAAAAAA/DwAAQA8AAEcPAAAAAAAAQg8AAAAAAAAmAAAARA8AAAIAAABJDwAAAAAAAEUPAAAB"
    "AAAARw8AAAAAAABCDwAAAAAAAAABAABEDwAADQAAAEkPAAAAAAAASQ8AAAEAAAA9DwAAAQAAAAAA"
    "AAACAAAARQ8AAAAAAAA+DwAAAAAAAAwAAAA9DwAAAQAAAAEAAAAAAAAAPg8AAAEAAAAAAQAASA8A"
    "AEcPAAAAAAAAQg8AAAAAAAAAAQAARA8AABoAAABJDwAAAAAAAEkPAAABAAAARw8AAAAAAAA9DwAA"
    "AQAAAAAAAAACAAAAPg8AAAEAAAAAAQAASA8AAEUPAAACAAAAPQ8AAAAAAAAAAAAAAQAAAD4PAAAA"
    "AAAAAAEAAEUPAAAEAAAAQQ8AAEYPAABFDwAAAwAAAEIPAAABAAAAAAAAAEQPAABaAAAAQg8AAAAA"
    "AAAmAAAARA8AADYAAAAPJwAAuCIAAAAAAAA="
)
_it = struct.iter_unpack('I', BYTECODE_BYTES)
BYTECODE = [v[0] for v in _it]
LEN_BC = len(BYTECODE)
BYTECODE += [0, 0, 0, 0] # 防止后几个一下子读四个读过头,加点padding反正也解析不到这里

接下来就可以根据我们解析出来的每一条指令的含义来编写翻译程序了

 
# VM Part
IP = 0
SP = -1
R0, R1, R2 = 0, 0, 0
TMP  = 0
FLAG = 0
 
# Print Byte Code
while IP < LEN_BC - 1:
    cur_op_data = BYTECODE[IP:IP+4]
    op = cur_op_data[0]
    data = cur_op_data[1:4]
 
    match op:
        case 8888:
            print(f"[{IP:02d}] FAILED")
            IP += 1
        case 9999:
            print(f"[{IP:02d}] SUCCESS")
            IP += 1
        case 0xf3f:
            print(f"[{IP:02d}] son(TMP) := READ(1)")
            IP += 1
        case 0xf3d:
            print(f"[{IP:02d}] R{data[0]} += Table_{data[1]}[R{data[2]}]")
            IP += 4
        case 0xf3e:
            print(f"[{IP:02d}] R{data[0]} %= {data[1]}")
            IP += 3
        case 0xf40:
            print(f"[{IP:02d}] TMP := son(TMP)")
            print(f"[{IP:02d}] PUSH TMP")
            IP += 1
            SP += 1
        case 0xf41:
            print(f"[{IP:02d}] POP (TMP := POP)")
            IP += 1
            SP -= 1
        case 0xf42:
            if data[0] == 0:
                print(f"[{IP:02d}] FLAG := ({data[1]} == R2)")
            elif data[0] == 1:
                print(f"[{IP:02d}] FLAG := (R0 == TMP)")
            IP += 3
        case 0xf43:
            print(f"[{IP:02d}] JZ {data[0]}")
            IP += 2
        case 0xf44:
            print(f"[{IP:02d}] JNZ {data[0]}")
            IP += 2
        case 0xf45:
            if data[0] == 0:
                print(f"[{IP:02d}] R0 := R2")
            elif data[0] == 1:
                print(f"[{IP:02d}] Table_0[R2] := R2")
            elif data[0] == 2:
                print(f"[{IP:02d}] R0 := Table_0[R2]")
            elif data[0] == 3:
                print(f"[{IP:02d}] R0 := Table_2[R2-1]")
            elif data[0] == 4:
                print(f"[{IP:02d}] R0 := Table_0[R0]")
            IP += 2
        case 0xf46:
            print(f"[{IP:02d}] TMP ^= R0")
            IP += 1
        case 0xf47:
            if data[0] == 0:
                print(f"[{IP:02d}] R2 += 1")
            elif data[0] == 1:
                print(f"[{IP:02d}] R1 += 1")
            IP += 2
        case 0xf48:
            print(f"[{IP:02d}] Table_0[R2] = Table_0[R1]")
            print(f"[{IP:02d}] Table_0[R1] = Table_0[R2]")
            IP += 1
        case 0xf49:
            if data[0] == 0:
                print(f"[{IP:02d}] R2 := 0")
            elif data[0] == 1:
                print(f"[{IP:02d}] R1 := 0")
            IP += 2
        case _:
            pass

得到的输出为

[00] R2 := 0
[02] son(TMP) := READ(1)
[03] TMP := son(TMP)
[03] PUSH TMP
[04] R2 += 1
[06] FLAG := (38 == R2)
[09] JNZ 2
[11] R2 := 0
[13] Table_0[R2] := R2
[15] R2 += 1
[17] FLAG := (256 == R2)
[20] JNZ 13
[22] R2 := 0
[24] R1 := 0
[26] R1 += Table_0[R2]
[30] R0 := R2
[32] R0 %= 12
[35] R1 += Table_1[R0]
[39] R1 %= 256
[42] Table_0[R2] = Table_0[R1]
[42] Table_0[R1] = Table_0[R2]
[43] R2 += 1
[45] FLAG := (256 == R2)
[48] JNZ 26
[50] R2 := 0
[52] R1 := 0
[54] R2 += 1
[56] R1 += Table_0[R2]
[60] R1 %= 256
[63] Table_0[R2] = Table_0[R1]
[63] Table_0[R1] = Table_0[R2]
[64] R0 := Table_0[R2]
[66] R0 += Table_0[R1]
[70] R0 %= 256
[73] R0 := Table_0[R0]
[75] POP (TMP := POP)
[76] TMP ^= R0
[77] R0 := Table_2[R2-1]
[79] FLAG := (R0 == TMP)
[82] JNZ 90
[84] FLAG := (38 == R2)
[87] JNZ 54
[89] SUCCESS
[90] FAILED

十分的清晰,赏心悦目()

可以手动转化为 C++ 方便看一点

#include <stack>
#include <iostream>
#include <cstdint>
#include <vector>
 
void failed() {
    std::cout << "FAILED\n";
    exit(1);
}
 
void success() {
    std::cout << "SUCCESS\n";
    exit(0);
}
 
int main() {
    std::stack<uint64_t> S;
    for (size_t r2 = 0; r2 < 38; ++r2) {
        char c = std::cin.get();
        S.push(c);
    }
 
    std::vector<uint64_t> Table_0;
    for (size_t r2 = 0; r2 < 256; ++r2) {
        Table_0[r2] = r2;
    }
 
    std::vector<uint64_t> Table_1 = 
    {
        0x0000004d, 0x00000069, 0x0000006e, 0x00000069,
        0x0000004c, 0x00000043, 0x00000054, 0x00000046,
        0x00000032, 0x00000030, 0x00000032, 0x00000033
    };
 
    std::vector<uint64_t> Table_2 =
    {
        0x00000093, 0x000000a3, 0x000000cb, 0x000000c9,
        0x000000d6, 0x000000d3, 0x000000f0, 0x000000d5,
        0x000000b1, 0x0000001a, 0x00000054, 0x0000009b,
        0x00000050, 0x000000cb, 0x000000b0, 0x000000b2,
        0x000000eb, 0x0000000f, 0x000000b2, 0x0000008d,
        0x0000002f, 0x000000e6, 0x00000015, 0x000000cb,
        0x000000b5, 0x0000003d, 0x000000d7, 0x0000009c,
        0x000000c5, 0x00000081, 0x0000003f, 0x00000091,
        0x00000090, 0x000000f1, 0x0000009b, 0x000000ab,
        0x0000002f, 0x000000f2, 0x00000000, 0x00000000
    };
 
 
    for (size_t r0 = 0, r1 = 0, r2 = 0; r2 < 256; ++r2) {
        r1 += Table_0[r2];
        r0 = r2 % 12;
        r1 += Table_1[r0];
        r1 %= 256;
 
        Table_0[r2] = Table_0[r1], Table_0[r1] = Table_0[r2];
    }
 
    for (size_t r0 = 0, r1 = 0, r2 = 0; r2 < 38; ) {
        r2 += 1;
        r1 += Table_0[r2];
        r1 %= 256;
 
        Table_0[r2] = Table_0[r1], Table_0[r1] = Table_0[r2];
        
        r0 = Table_0[r2];
        r0 += Table_0[r1];
        r0 %= 256;
        r0 = Table_0[r0];
 
        auto TMP = S.top();
        S.pop();
        TMP ^= r0;
        r0 = Table_2[r2 - 1];
 
        if (r0 != TMP) failed();
    }
 
    success();
}

到这里答案已经呼之欲出了

解密

上述程序即使看不出来是 RC4 变种也可以根据逻辑推出逆向算法,由于 S 盒的变换是一样的,输入对大部分操作都不产生影响,只是和变换后的某个确定位进行异或产生的密文,因此大部分代码可以照抄

ENC = [
    0x93, 0xa3, 0xcb, 0xc9,
    0xd6, 0xd3, 0xf0, 0xd5,
    0xb1, 0x1a, 0x54, 0x9b,
    0x50, 0xcb, 0xb0, 0xb2,
    0xeb, 0x0f, 0xb2, 0x8d,
    0x2f, 0xe6, 0x15, 0xcb,
    0xb5, 0x3d, 0xd7, 0x9c,
    0xc5, 0x81, 0x3f, 0x91,
    0x90, 0xf1, 0x9b, 0xab,
    0x2f, 0xf2, 0x00, 0x00
]
 
S = [i for i in range(256)]
 
KEY = [
    0x4d, 0x69, 0x6e, 0x69,
    0x4c, 0x43, 0x54, 0x46,
    0x32, 0x30, 0x32, 0x33,
]
 
r0, r1, r2 = 0, 0, 0
for r2 in range(256):
    r1 += S[r2]
    r0 = r2 % 12
    r1 += KEY[r0]
    r1 %= 256
    S[r2] = S[r1]
    S[r1] = S[r2]
 
flag = []
 
r0, r1, r2 = 0, 0, 0
while r2 < 38:
    r2 += 1
    r1 += S[r2]
    r1 %= 256
 
    S[r2] = S[r1]
    S[r1] = S[r2]
 
    r0 = S[r2]
    r0 += S[r1]
    r0 %= 256
    r0 = S[r0]
 
    flag.append(r0 ^ ENC[r2 - 1])
 
print(bytes(flag[::-1]))

注意输入是堆栈的,因此输出要逆转([::-1])过来

得到 flag
b'a_v1rtu@l_m@ch1ne_w1th_ma9ical_sy$call'