HitCTF2025-Reverse·VM

  1. HitCTF2025-Reverse·VM
    1. 一、前言
    2. 二、开始
    3. easyVM
    4. ComplexVM
    5. 三、结语

HitCTF2025-Reverse·VM

一、前言

​ 说来也是搞笑,从接触 REVERSE 以来,并没有好好去学习或者复现过 vm 逆向,每次打比赛遇到了都选择直接 rm…

​ 昨天刚好看到 HitCTF2025 有两道 vm 逆向,遂学习记录一下。

二、开始

easyVM

​ 附件中是一个 .exe 文件,ida 打开之后,main 函数第一行就是输出,然后可以看到 sub_7FF779ED1270 函数被调用两次,且分别传入不同的上下文结构体,那么基本可以确定 sub_7FF779ED1270 函数就是核心函数,即 VM Engine,跟进 sub_7FF779ED1270 函数进行分析。

​ 其中就是一个 switch-case 结构,根据分析可得到如下:

  • 首先看到*(a1 + 1060) 被频繁读/写,而且每次取 opcode 后都 +1,那么就可以确定这是 指令指针(IP);

  • 然后是*(a1 + 1072),参与*(ip + base),可确定是字节码基址;

  • *(a1 + 1080):拿来跟 IP 比较,说明其是 code_end/code_len;

  • *(a1 + 4*i):i 的范围检查是 <8,可确定有八个寄存器,且每个为四字节;

  • 在 case16 中可以看到 a1 + 1084 被 fgets 写入,可确定其是 输入缓冲;

  • 在 case17 中可以看到 a1 + 1134 被 printf 输出,可确定其是 输出缓冲;

  • 在 case14 中看到 根据*(a1 + 1064)的值进行判断然后跳转,逻辑即 jnz;

  • sub_7FF779ED11E0 函数就是在更新标志位

char __fastcall sub_7FF779ED11E0(__int64 a1, int a2, int a3, int a4)
{
  char v4; // al
  char v6; // al
  char v7; // cl
  char v8; // al
  bool v9; // sf
  char result; // al

  v4 = *(a1 + 1064);
  if ( a2 )                                     // ZF
    v6 = v4 & 0xFE;
  else
    v6 = v4 | 1;
  *(a1 + 1064) = v6;
  if ( a2 >= 0 )                                // SF
    v7 = v6 & 0xFD;
  else
    v7 = v6 | 2;
  *(a1 + 1064) = v7;
  if ( a2 >= a3 || a4 <= 0 )                    // CF
    v8 = v7 & 0xFB;
  else
    v8 = v7 | 4;
  *(a1 + 1064) = v8;
  v9 = a3 < 0;
  if ( a3 > 0 )                                 // OF
  {
    if ( a4 > 0 && a2 < 0 )
      goto LABEL_18;
    v9 = a3 < 0;
  }
  if ( v9 && a4 < 0 && a2 > 0 )
  {
LABEL_18:
    result = v8 | 8;
    *(a1 + 1064) = result;
    return result;
  }
  result = v8 & 0xF7;
  *(a1 + 1064) = result;
  return result;
}

​ 然后根据上面的分析结构,对 a1 进行重定义,定义为 结构体。在 IDA 上方的菜单栏,依次点击 View -> Open Subviews -> Local types,然后右键点击 insert,贴入如下结构体的申明

struct VMState
{
  int regs[8];
  char _pad0[1028];
  int ip;
  int flags;
  char _pad1[4];
  unsigned __int8 *code;
  int code_end;
  char inbuf[50];
  char outbuf[128];
};

​ 接着结合分析,可清楚各操作数所对应的操作

__int64 __fastcall sub_7FF779ED1270(struct VMState *vm)
{
  __int64 ip; // rdx
  unsigned __int8 *code; // r8
  int opcode; // ecx
  int v5; // edx
  int v6; // ecx
  unsigned __int8 n0x80; // r9
  char v8; // dl
  int v9; // ecx
  unsigned __int8 n8; // r9
  unsigned __int8 n0x32; // dl
  int v12; // ecx
  unsigned __int8 n8_1; // r9
  unsigned __int8 n8_2; // dl
  int v15; // ecx
  __int64 n8_3; // r9
  int v17; // edx
  int v18; // ecx
  unsigned __int8 n8_4; // r9
  unsigned __int8 n8_5; // dl
  int v21; // r8d
  int *v22; // rcx
  int v23; // r9d
  __int64 n8_6; // r10
  int v25; // r9d
  int v26; // r8d
  int v27; // ecx
  unsigned __int8 n8_7; // r9
  unsigned __int8 n8_8; // dl
  int v30; // r8d
  int *v31; // rcx
  int v32; // r9d
  __int64 n8_9; // r10
  int v34; // r9d
  int v35; // r8d
  unsigned __int8 n8_10; // r10
  unsigned __int8 n8_11; // r9
  int *v38; // rdx
  int v39; // ecx
  __int64 n8_12; // r9
  int v41; // edx
  int v42; // ecx
  unsigned __int8 n8_13; // r9
  unsigned __int8 n8_14; // dl
  __int64 n8_15; // r9
  int v46; // r10d
  bool v47; // zf
  int v48; // ecx
  FILE *v49; // rax
  __int64 v50; // rax

  if ( vm->ip < vm->code_end )
  {
    while ( 2 )
    {
      ip = vm->ip;
      code = vm->code;
      opcode = code[ip];
      v5 = ip + 1;
      vm->ip = v5;
      switch ( opcode )
      {
        case 0:
          return 0i64;
        case 1:                                 // MOVImm2outBuf
          v6 = v5 + 1;
          n0x80 = code[v5];
          vm->ip = v5 + 1;
          v8 = code[v5 + 1];
          vm->ip = v6 + 1;
          if ( n0x80 >= 0x80u )
            return 0xFFFFFFFFi64;
          vm->outbuf[n0x80] = v8;
          goto LABEL_42;
        case 2:                                 // MOVinBuf2Reg
          v9 = v5 + 1;
          n8 = code[v5];
          vm->ip = v5 + 1;
          n0x32 = code[v5 + 1];
          vm->ip = v9 + 1;
          if ( n0x32 >= 0x32u || n8 >= 8u )
            return 0xFFFFFFFFi64;
          vm->regs[n8] = vm->inbuf[n0x32];
          goto LABEL_42;
        case 3:                                 // MOVReg2Reg
          v12 = v5 + 1;
          n8_1 = code[v5];
          vm->ip = v5 + 1;
          n8_2 = code[v5 + 1];
          vm->ip = v12 + 1;
          if ( n8_1 >= 8u || n8_2 >= 8u )
            return 0xFFFFFFFFi64;
          vm->regs[n8_1] = vm->regs[n8_2];
          goto LABEL_42;
        case 4:                                 // MOVImm2Reg
          v15 = v5 + 1;
          n8_3 = code[v5];
          vm->ip = v5 + 1;
          v17 = code[v5 + 1];
          vm->ip = v15 + 1;
          if ( n8_3 >= 8 )
            return 0xFFFFFFFFi64;
          vm->regs[n8_3] = v17;
          goto LABEL_42;
        case 5:                                 // ADDReg&Reg
          v18 = v5 + 1;
          n8_4 = code[v5];
          vm->ip = v5 + 1;
          n8_5 = code[v5 + 1];
          vm->ip = v18 + 1;
          if ( n8_4 >= 8u || n8_5 >= 8u )
            return 0xFFFFFFFFi64;
          v21 = vm->regs[n8_4];
          v22 = &vm->regs[n8_4];
          v23 = vm->regs[n8_5];
          *v22 = v23 + v21;
          sub_7FF779ED11E0(vm, v23 + v21, v21, v23);
          goto LABEL_42;
        case 6:                                 // ADDReg&Imm
          n8_6 = code[v5];
          vm->ip = v5 + 1;
          v25 = code[v5 + 1];
          vm->ip = v5 + 2;
          if ( n8_6 >= 8 )
            return 0xFFFFFFFFi64;
          v26 = vm->regs[n8_6];
          vm->regs[n8_6] = v26 + v25;
          sub_7FF779ED11E0(vm, v26 + v25, v26, v25);
          goto LABEL_42;
        case 7:                                 // SUBReg&Reg
          v27 = v5 + 1;
          n8_7 = code[v5];
          vm->ip = v5 + 1;
          n8_8 = code[v5 + 1];
          vm->ip = v27 + 1;
          if ( n8_7 >= 8u || n8_8 >= 8u )
            return 0xFFFFFFFFi64;
          v30 = vm->regs[n8_7];
          v31 = &vm->regs[n8_7];
          v32 = vm->regs[n8_8];
          *v31 = v30 - v32;
          sub_7FF779ED11E0(vm, v30 - v32, v30, -v32);
          goto LABEL_42;
        case 8:                                 // SUBReg&Imm
          n8_9 = code[v5];
          vm->ip = v5 + 1;
          v34 = code[v5 + 1];
          vm->ip = v5 + 2;
          if ( n8_9 >= 8 )
            return 0xFFFFFFFFi64;
          v35 = vm->regs[n8_9];
          vm->regs[n8_9] = v35 - v34;
          sub_7FF779ED11E0(vm, v35 - v34, v35, -v34);
          goto LABEL_42;
        case 9:                                 // XORReg&Reg
          n8_10 = code[v5];
          vm->ip = v5 + 1;
          n8_11 = code[v5 + 1];
          vm->ip = v5 + 2;
          if ( n8_10 >= 8u || n8_11 >= 8u )
            return 0xFFFFFFFFi64;
          v38 = &vm->regs[n8_10];
          *v38 ^= vm->regs[n8_11];
          sub_7FF779ED11E0(vm, *v38, 0, 0);
          goto LABEL_42;
        case 10:                                // XORReg&Imm
          v39 = v5 + 1;
          n8_12 = code[v5];
          vm->ip = v5 + 1;
          v41 = code[v5 + 1];
          vm->ip = v39 + 1;
          if ( n8_12 >= 8 )
            return 0xFFFFFFFFi64;
          vm->regs[n8_12] ^= v41;
          sub_7FF779ED11E0(vm, vm->regs[n8_12], 0, 0);
          goto LABEL_42;
        case 11:                                // CMPReg&Reg
          v42 = v5 + 1;
          n8_13 = code[v5];
          vm->ip = v5 + 1;
          n8_14 = code[v5 + 1];
          vm->ip = v42 + 1;
          if ( n8_13 >= 8u || n8_14 >= 8u )
            return 0xFFFFFFFFi64;
          sub_7FF779ED11E0(vm, vm->regs[n8_13] - vm->regs[n8_14], vm->regs[n8_13], -vm->regs[n8_14]);
          goto LABEL_42;
        case 12:                                // CMPReg&Imm
          n8_15 = code[v5];
          vm->ip = v5 + 1;
          v46 = code[v5 + 1];
          vm->ip = v5 + 2;
          if ( n8_15 >= 8 )
            return 0xFFFFFFFFi64;
          sub_7FF779ED11E0(vm, vm->regs[n8_15] - v46, vm->regs[n8_15], -v46);
LABEL_42:
          if ( vm->ip >= vm->code_end )
            return 0i64;
          continue;
        case 13:                                // JMP
          vm->ip = code[v5];
          goto LABEL_42;
        case 14:                                // JNZ
          v47 = (vm->flags & 1) == 0;
          v48 = code[v5];
          vm->ip = v5 + 1;
          if ( v47 )
            vm->ip = v48;
          goto LABEL_42;
        case 16:                                // READ
          v49 = _acrt_iob_func(0);
          common_fgets<char>(vm->inbuf, 50i64, v49);
          v50 = -1i64;
          do
            ++v50;
          while ( vm->inbuf[v50] );
          if ( v50 && *(&vm->code_end + v50 + 3) == 10 )
            *(&vm->code_end + v50 + 3) = 0;
          goto LABEL_42;
        case 17:                                // PRINT
          printf("%s", vm->outbuf);
          goto LABEL_42;
        default:
          return 0xFFFFFFFFi64;
      }
    }
  }
  return 0i64;
}

​ 然后编写脚本把汇编进行输出

OPLEN = {
    0x00: 1, 0x01: 3, 0x02: 3, 0x03: 3, 0x04: 3,
    0x05: 3, 0x06: 3, 0x07: 3, 0x08: 3, 0x09: 3,
    0x0A: 3, 0x0B: 3, 0x0C: 3, 0x0D: 2, 0x0E: 2,
    0x10: 1, 0x11: 1,
}

def parse_hex(s: str) -> bytes:
    s = s.replace("\n", " ").replace("\t", " ").strip()
    return bytes(int(x, 16) for x in s.split() if x)

def disasm(code: bytes) -> None:
    ip = 0
    while ip < len(code):
        op = code[ip]
        ln = OPLEN.get(op, 1)
        ins = code[ip:ip+ln]

        if op not in OPLEN:
            print(f"{ip:04x}: db {op:02x}")
            ip += 1
            continue

        def b(i): return ins[i]

        if op == 0x00: print(f"{ip:04x}: HALT")
        elif op == 0x01: print(f"{ip:04x}: PUTC out[{b(1)}] = {b(2):02x} ('{chr(b(2))}')")
        elif op == 0x02: print(f"{ip:04x}: LOAD R{b(1)}, in[{b(2)}]")
        elif op == 0x03: print(f"{ip:04x}: MOV  R{b(1)}, R{b(2)}")
        elif op == 0x04: print(f"{ip:04x}: MOVI R{b(1)}, {b(2)}")
        elif op == 0x05: print(f"{ip:04x}: ADD  R{b(1)}, R{b(2)}")
        elif op == 0x06: print(f"{ip:04x}: ADDI R{b(1)}, {b(2)}")
        elif op == 0x07: print(f"{ip:04x}: SUB  R{b(1)}, R{b(2)}")
        elif op == 0x08: print(f"{ip:04x}: SUBI R{b(1)}, {b(2)}")
        elif op == 0x09: print(f"{ip:04x}: XOR  R{b(1)}, R{b(2)}")
        elif op == 0x0A: print(f"{ip:04x}: XORI R{b(1)}, {b(2)}")
        elif op == 0x0B: print(f"{ip:04x}: CMP  R{b(1)}, R{b(2)}")
        elif op == 0x0C: print(f"{ip:04x}: CMPI R{b(1)}, {b(2)}")
        elif op == 0x0D: print(f"{ip:04x}: JMP  {b(1):02x}")
        elif op == 0x0E: print(f"{ip:04x}: JNZ  {b(1):02x}")
        elif op == 0x10: print(f"{ip:04x}: READ")
        elif op == 0x11: print(f"{ip:04x}: PRINT")
        ip += ln

if __name__ == "__main__":
    HEX = """
    0D 3C 01 06 0A 01 02 6F 01 00 
    57 01 05 21 01 03 6E 01 01 72 
    01 04 67 01 07 00 11 00 01 04 
    65 01 05 63 01 09 00 01 01 6F 
    01 03 72 01 07 21 01 02 72 01 
    08 0A 01 00 43 01 06 74 11 00 
    10 02 00 00 0C 00 66 0E 02 02 
    00 01 03 01 00 06 01 01 0C 01 
    6D 0E 02 02 01 02 02 02 03 02 
    03 04 03 00 01 09 00 02 0C 00 
    06 0E 02 03 00 02 09 00 03 0C 
    00 1C 0E 02 03 00 01 09 00 03 
    0C 00 1A 0E 02 02 01 05 02 02 
    06 02 03 07 0C 02 69 0E 02 03 
    00 01 09 00 02 0C 00 21 0E 02 
    03 00 03 09 00 02 0C 00 3D 0E 
    02 02 04 08 02 05 09 02 06 0A 
    0B 05 03 0E 02 03 00 04 09 00 
    05 0C 00 17 0E 02 03 00 06 09 
    00 05 0C 00 12 0E 02 02 01 0B 
    02 02 0C 02 03 0D 02 04 0E 02 
    05 0F 02 06 10 02 07 11 0B 02 
    04 0E 02 03 00 01 05 00 00 0C 
    00 BE 0E 02 09 00 00 0B 00 07 
    0E 02 05 00 02 0C 00 32 0E 02 
    08 00 02 0C 00 30 0E 02 06 00 
    05 0B 00 05 0E 02 02 00 04 07 
    06 00 0C 06 02 0E 02 0D 1C 00
    """
    code = parse_hex(HEX)
    disasm(code)

​ 得到输出

0000: JMP  3c
0002: PUTC out[6] = 0a ('
')
0005: PUTC out[2] = 6f ('o')
0008: PUTC out[0] = 57 ('W')
000b: PUTC out[5] = 21 ('!')
000e: PUTC out[3] = 6e ('n')
0011: PUTC out[1] = 72 ('r')
0014: PUTC out[4] = 67 ('g')
0017: PUTC out[7] = 00 (' ')
001a: PRINT
001b: HALT
001c: PUTC out[4] = 65 ('e')
001f: PUTC out[5] = 63 ('c')
0022: PUTC out[9] = 00 (' ')
0025: PUTC out[1] = 6f ('o')
0028: PUTC out[3] = 72 ('r')
002b: PUTC out[7] = 21 ('!')
002e: PUTC out[2] = 72 ('r')
0031: PUTC out[8] = 0a ('
')
0034: PUTC out[0] = 43 ('C')
0037: PUTC out[6] = 74 ('t')
003a: PRINT
003b: HALT
003c: READ
003d: LOAD R0, in[0]
0040: CMPI R0, 102
0043: JNZ  02
0045: LOAD R0, in[1]
0048: MOV  R1, R0
004b: ADDI R1, 1
004e: CMPI R1, 109
0051: JNZ  02
0053: LOAD R1, in[2]
0056: LOAD R2, in[3]
0059: LOAD R3, in[4]
005c: MOV  R0, R1
005f: XOR  R0, R2
0062: CMPI R0, 6
0065: JNZ  02
0067: MOV  R0, R2
006a: XOR  R0, R3
006d: CMPI R0, 28
0070: JNZ  02
0072: MOV  R0, R1
0075: XOR  R0, R3
0078: CMPI R0, 26
007b: JNZ  02
007d: LOAD R1, in[5]
0080: LOAD R2, in[6]
0083: LOAD R3, in[7]
0086: CMPI R2, 105
0089: JNZ  02
008b: MOV  R0, R1
008e: XOR  R0, R2
0091: CMPI R0, 33
0094: JNZ  02
0096: MOV  R0, R3
0099: XOR  R0, R2
009c: CMPI R0, 61
009f: JNZ  02
00a1: LOAD R4, in[8]
00a4: LOAD R5, in[9]
00a7: LOAD R6, in[10]
00aa: CMP  R5, R3
00ad: JNZ  02
00af: MOV  R0, R4
00b2: XOR  R0, R5
00b5: CMPI R0, 23
00b8: JNZ  02
00ba: MOV  R0, R6
00bd: XOR  R0, R5
00c0: CMPI R0, 18
00c3: JNZ  02
00c5: LOAD R1, in[11]
00c8: LOAD R2, in[12]
00cb: LOAD R3, in[13]
00ce: LOAD R4, in[14]
00d1: LOAD R5, in[15]
00d4: LOAD R6, in[16]
00d7: LOAD R7, in[17]
00da: CMP  R2, R4
00dd: JNZ  02
00df: MOV  R0, R1
00e2: ADD  R0, R0
00e5: CMPI R0, 190
00e8: JNZ  02
00ea: XOR  R0, R0
00ed: CMP  R0, R7
00f0: JNZ  02
00f2: ADD  R0, R2
00f5: CMPI R0, 50
00f8: JNZ  02
00fa: SUBI R0, 2
00fd: CMPI R0, 48
0100: JNZ  02
0102: ADDI R0, 5
0105: CMP  R0, R5
0108: JNZ  02
010a: LOAD R0, in[4]
010d: SUB  R6, R0
0110: CMPI R6, 2
0113: JNZ  02
0115: JMP  1c
0117: HALT

​ 就是一些简单的约束,这里就不赘述怎么求解了,最终 flag 就是 flag{HiTCTF_2025}


ComplexVM

​ 这道 vm 比上一道要复杂一些。main 函数的开头部分申明了一个很大的 buffer(v33),明显被当成 VM COntext / 寄存器块 + 字节码区 使用。接着初始化一些变量。然后 sub_140001210 函数就是解释器。

​ while 循环 1024 个 opcode,其中 sub_140004090 函数一直跟下去是反调试的逻辑。第一个 while 循环就是输出 Input your flag: 的。

​ 第二段就很重要了。构造了另一个 VM 上下文 v36 + v40。跟进 sub_1400048C0 函数可以看到有一个 fake flag。

__int64 __fastcall sub_1400048C0(_BYTE *a1)
{
  int n0x11; // r9d
  unsigned __int8 *i; // r8
  int v3; // edx
  int v4; // edx
  __int64 n0x11_1; // rax
  char flag_HITCTF_fake__[24]; // [rsp+0h] [rbp-28h] BYREF

  n0x11 = 0;
  strcpy(flag_HITCTF_fake__, "flag{HITCTF_fake}");
  for ( i = a1 + 1052; ; ++i )
  {
    v3 = i[flag_HITCTF_fake__ - a1 - 1052];
    a1[8] = v3;
    v4 = v3 - 1;
    n0x11_1 = *i;
    a1[9] = n0x11_1;
    if ( v4 )
      a1[16] &= ~1u;
    else
      a1[16] |= 1u;
    if ( (v4 & 0x80u) == 0 )
      a1[16] &= ~4u;
    else
      a1[16] |= 4u;
    if ( v4 >= 0 )
      a1[16] &= ~2u;
    else
      a1[16] |= 2u;
    if ( (a1[16] & 1) == 0 )
      break;
    ++n0x11;
    a1[15] = -86;
    n0x11_1 = n0x11;
    if ( n0x11 >= 0x11 )
      return n0x11_1;
  }
  a1[15] = -1;
  return n0x11_1;
}

​ 并且根据伪代码可知该函数是从 a1 + 1052 开始循环的,同时再看 v36 和 v40 之间的相对偏移

image-20260104144037928

​ 差了 0x1C(28)字节,即 a1 + 1052 正好落在 v40 + 1024 这个位置。故可知 sub_1400048C0 函数就是一个假验证逻辑:拿 flag{HITCTF_fake} 去对比 数据区开头(v40 + 1024),而此时 memset(v40,0,sizeof(v40)) 刚清零,所以比较肯定失败。接下来进入解释器分析。

​ 该 vm 的布局如下:

  • a1 + 20:指令指针 IP;
  • a1 + 16:标志寄存器 Flags(Z/N/C);
  • a1 + 8:寄存器文件 regs[];
  • a1 + 28:字节码/内存池men[]。它即被 IP 当作“取指/取操作数”的来源,也被当做栈或者中间存储写入;
  • a1 + 24:栈指针/栈深 SP。反复出现 if (*(a1+24) >= 0x3FF
  • a1 + 1052 / a1 + 1180:输入缓冲区 & 输出缓冲区。

​ 然后根据分析的布局,申明一个结构体

typedef struct VMCTX
{
  void **dispatch;        // 0x00: off_14001DCF8 这类 handler 表指针

  // 0x08..0x0F: 8个“虚拟寄存器/槽位”
  // 你看到的 a1[8], a1[9], a1[15] 都落在这里:reg[0]=a1[8], reg[1]=a1[9], ..., reg[7]=a1[15]
  unsigned __int8 reg[8]; // 0x08

  unsigned int flags;     // 0x10: a1[16] 实际就是 flags 的低字节 (ZF/CF/SF 等)
  unsigned int ip;        // 0x14: *(a1+20) 作为 operand 读取/跳转用的 IP
  unsigned int sp;        // 0x18: *(a1+24) 栈指针/深度(上限 0x3FF)

  unsigned __int8 mem[0x400];  // 0x1C: v40[0..1023] 字节码/内存区(你外层循环就是从这里取 opcode)
  char input[0x80];            // 0x41C: a1+1052,fgets 读入,case 31 从这里取字符
  char output[0x80];           // 0x49C: a1+1180,case 32 写入,case 29/30 printf 输出
} VMCTX;

​ 可以看到在 switch-case 的后半部分全是特殊的赋值情况,其中一个例子如下。即取一个立即数,然后进行加/减一个数,再进行赋值。

image-20260104153727876

​ 然后前半部分就是一些常见的运算赋值等操作。然后就可以使用脚本进行输出汇编操作了

import sys
import argparse
import re

# operand kinds
R   = "R"     # 1 byte register index
I8  = "I8"    # 1 byte immediate
A16 = "A16"   # 2 byte address (big-endian)

# Base opcode table (你已经验证的一批)
OPS = {
    0x00: ("NOP",  []),

    0x01: ("MOV",  [R, R]),
    0x02: ("ADD",  [R, R]),
    0x03: ("SUB",  [R, R]),
    0x04: ("AND",  [R, R]),
    0x05: ("OR",   [R, R]),
    0x06: ("XOR",  [R, R]),

    0x07: ("MOVI", [R, I8]),
    0x08: ("ADDI", [R, I8]),
    0x09: ("SUBI", [R, I8]),
    0x0A: ("ANDI", [R, I8]),
    0x0B: ("ORI",  [R, I8]),
    0x0C: ("XORI", [R, I8]),

    0x0D: ("NOT",  [R]),
    0x0E: ("SHL1", [R]),
    0x0F: ("SHR1", [R]),

    0x10: ("JMP",  [A16]),
    0x11: ("JZ",   [A16]),
    0x12: ("JNZ",  [A16]),
    0x13: ("CALL", [A16]),
    0x14: ("RET",  []),

    0x15: ("PUSH", [R]),
    0x16: ("POP",  [R]),

    0x17: ("CMP",  [R, R]),
    0x18: ("CMPI", [R, I8]),

    0x19: ("INC",  [R]),
    0x1A: ("DEC",  [R]),

    0x1B: ("DAA",  []),

    0x1C: ("READ", []),
    0x1D: ("PRINT", []),
    0x1E: ("PRINT2", []),

    0x1F: ("LDI_IN",  [R, I8]),   # r = input[idx]
    0x20: ("STO_OUT", [I8, R]),   # output[idx] = r
}

# 你贴出来的 0x21~0x25:格式都是 [opcode][dst][imm],语义 reg[dst] = imm + adj
# case 0x21: imm - 54
# case 0x22: imm - 112
# case 0x23: imm + 68
# case 0x24: imm + 57
# case 0x25: imm + 78
LDC_ADJ = {
    0x21: -54,
    0x22: -112,
    0x23: +68,
    0x24: +57,
    0x25: +78,
}

def u16be(b0, b1):
    return ((b0 & 0xFF) << 8) | (b1 & 0xFF)

def is_printable(x):
    return 0x20 <= x <= 0x7E

def decode_one(code, pc, guess_ldc=False):
    """
    返回 (next_pc, asm_line, jump_target_or_None)
    """
    n = len(code)
    if pc >= n:
        return pc + 1, "%04X: <eof>" % pc, None

    op = code[pc]

    # LDC 变体:固定 3 字节
    if op in LDC_ADJ:
        if pc + 3 > n:
            raw = " ".join("%02X" % b for b in code[pc:])
            return n, "%04X: %s  ; truncated" % (pc, raw), None
        dst = code[pc+1]
        imm = code[pc+2]
        adj = LDC_ADJ[op]
        val = (imm + adj) & 0xFF
        ch = " '%s'" % chr(val) if is_printable(val) else ""
        raw = "%02X %02X %02X" % (op, dst, imm)
        sign = "+" if adj >= 0 else "-"
        line = "%04X: %-11s LDC     r%d, 0x%02X%s   ; imm=0x%02X %s %d" % (
            pc, raw, dst, val, ch, imm, sign, abs(adj)
        )
        return pc + 3, line, None

    # 如果启用 guess_ldc:未知但在 0x21..0xFE 也按 3 字节吃掉,避免失步(可选)
    if guess_ldc and (0x21 <= op <= 0xFE) and (op not in OPS):
        if pc + 3 > n:
            raw = " ".join("%02X" % b for b in code[pc:])
            return n, "%04X: %s  ; truncated" % (pc, raw), None
        dst = code[pc+1]
        imm = code[pc+2]
        raw = "%02X %02X %02X" % (op, dst, imm)
        line = "%04X: %-11s OP%02X   r%d, 0x%02X" % (pc, raw, op, dst, imm)
        return pc + 3, line, None

    # 普通表驱动
    if op not in OPS:
        raw = "%02X" % op
        return pc + 1, "%04X: %-11s DB      0x%02X   ; unknown" % (pc, raw, op), None

    mnem, kinds = OPS[op]

    # 计算指令长度
    need = 1
    for k in kinds:
        need += 2 if k == A16 else 1
    if pc + need > n:
        raw = " ".join("%02X" % b for b in code[pc:])
        return n, "%04X: %s  ; truncated" % (pc, raw), None

    raw_bytes = code[pc:pc+need]
    raw = " ".join("%02X" % b for b in raw_bytes)

    # 读操作数
    i = pc + 1
    ops_txt = []
    jmp_tgt = None
    for k in kinds:
        if k == A16:
            tgt = u16be(code[i], code[i+1])
            ops_txt.append("loc_%04X" % tgt)
            jmp_tgt = tgt
            i += 2
        elif k == R:
            r = code[i]
            ops_txt.append("r%d" % r)
            i += 1
        else:  # I8
            imm = code[i]
            ops_txt.append("0x%02X" % imm)
            i += 1

    if ops_txt:
        line = "%04X: %-11s %-7s %s" % (pc, raw, mnem, ", ".join(ops_txt))
    else:
        line = "%04X: %-11s %s" % (pc, raw, mnem)

    return pc + need, line, jmp_tgt

def disasm(code, labels=False, guess_ldc=False):
    # pass1: 收集跳转目标(可选)
    targets = set()
    if labels:
        pc = 0
        while pc < len(code):
            npc, line, tgt = decode_one(code, pc, guess_ldc=guess_ldc)
            if tgt is not None:
                targets.add(tgt)
            if npc <= pc:
                break
            pc = npc

    # pass2: 输出
    pc = 0
    while pc < len(code):
        if labels and pc in targets:
            print("loc_%04X:" % pc)
        npc, line, tgt = decode_one(code, pc, guess_ldc=guess_ldc)
        print(line)
        if npc <= pc:
            break
        pc = npc

def read_hex_stream(text):
    # 允许空格/换行
    parts = text.strip().split()
    if not parts:
        return b""
    return bytes(int(x, 16) for x in parts)

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--hexfile", help="hex bytes file (e.g. code.hex). If omitted, read from stdin.")
    ap.add_argument("--labels", action="store_true", help="print labels for jump targets")
    ap.add_argument("--guess-ldc", action="store_true",
                    help="treat unknown opcodes in 0x21..0xFE as 3-byte (opcode,dst,imm) to avoid desync")
    args = ap.parse_args()

    if args.hexfile:
        text = open(args.hexfile, "r", encoding="utf-8", errors="ignore").read()
    else:
        text = sys.stdin.read()

    code = read_hex_stream(text)
    if not code:
        print("No input bytes. Usage: python vm_disasm.py --hexfile code.hex")
        return

    disasm(code, labels=args.labels, guess_ldc=args.guess_ldc)

if __name__ == "__main__":
    main()

​ 拿到输出

0000: 13 00 75    CALL    loc_0075
0003: 06 05 05    XOR     r5, r5
0006: 07 01 72    MOVI    r1, 0x72
0009: 07 03 6E    MOVI    r3, 0x6E
000C: 07 02 6F    MOVI    r2, 0x6F
000F: 07 04 67    MOVI    r4, 0x67
0012: 07 00 57    MOVI    r0, 0x57
0015: 20 00 00    STO_OUT 0x00, r0
0018: 20 04 04    STO_OUT 0x04, r4
001B: 20 02 02    STO_OUT 0x02, r2
001E: 20 03 03    STO_OUT 0x03, r3
0021: 20 01 01    STO_OUT 0x01, r1
0024: 20 05 05    STO_OUT 0x05, r5
0027: 1E          PRINT2
0028: 00          NOP
0029: FF          DB      0xFF   ; unknown
002A: 07 01 6F    MOVI    r1, 0x6F
002D: 07 04 65    MOVI    r4, 0x65
0030: 07 02 72    MOVI    r2, 0x72
0033: 07 06 74    MOVI    r6, 0x74
0036: 07 00 43    MOVI    r0, 0x43
0039: 07 05 63    MOVI    r5, 0x63
003C: 07 03 72    MOVI    r3, 0x72
003F: 06 07 07    XOR     r7, r7
0042: 20 03 03    STO_OUT 0x03, r3
0045: 20 01 01    STO_OUT 0x01, r1
0048: 20 02 02    STO_OUT 0x02, r2
004B: 20 00 00    STO_OUT 0x00, r0
004E: 20 04 04    STO_OUT 0x04, r4
0051: 20 06 06    STO_OUT 0x06, r6
0054: 20 05 05    STO_OUT 0x05, r5
0057: 20 07 07    STO_OUT 0x07, r7
005A: 1E          PRINT2
005B: 00          NOP
005C: FF          DB      0xFF   ; unknown
005D: 07 00 FF    MOVI    r0, 0xFF
0060: 09 00 01    SUBI    r0, 0x01
0063: 12 00 60    JNZ     loc_0060
0066: 14          RET
0067: 00          NOP
0068: 00          NOP
0069: 07 00 AA    MOVI    r0, 0xAA
006C: 07 01 55    MOVI    r1, 0x55
006F: 06 00 01    XOR     r0, r1
0072: 14          RET
0073: 00          NOP
0074: 00          NOP
0075: 07 00 00    MOVI    r0, 0x00
0078: 13 00 69    CALL    loc_0069
007B: 07 01 FF    MOVI    r1, 0xFF
007E: 17 00 01    CMP     r0, r1
0081: 12 00 03    JNZ     loc_0003
0084: 1C          READ
0085: 00          NOP
0086: 00          NOP
0087: 1F 00 02    LDI_IN  r0, 0x02
008A: 18 00 61    CMPI    r0, 0x61
008D: 12 00 03    JNZ     loc_0003
0090: 1F 00 01    LDI_IN  r0, 0x01
0093: 18 00 6C    CMPI    r0, 0x6C
0096: 12 00 03    JNZ     loc_0003
0099: 1F 00 00    LDI_IN  r0, 0x00
009C: 18 00 66    CMPI    r0, 0x66
009F: 12 00 03    JNZ     loc_0003
00A2: 1F 00 03    LDI_IN  r0, 0x03
00A5: 18 00 67    CMPI    r0, 0x67
00A8: 12 00 03    JNZ     loc_0003
00AB: 1F 00 04    LDI_IN  r0, 0x04
00AE: 18 00 7B    CMPI    r0, 0x7B
00B1: 12 00 03    JNZ     loc_0003
00B4: 1F 00 07    LDI_IN  r0, 0x07
00B7: 18 00 54    CMPI    r0, 0x54
00BA: 12 00 03    JNZ     loc_0003
00BD: 1F 00 06    LDI_IN  r0, 0x06
00C0: 18 00 49    CMPI    r0, 0x49
00C3: 12 00 03    JNZ     loc_0003
00C6: 1F 00 05    LDI_IN  r0, 0x05
00C9: 18 00 48    CMPI    r0, 0x48
00CC: 12 00 03    JNZ     loc_0003
00CF: 1F 00 08    LDI_IN  r0, 0x08
00D2: 18 00 43    CMPI    r0, 0x43
00D5: 12 00 03    JNZ     loc_0003
00D8: 1F 00 0C    LDI_IN  r0, 0x0C
00DB: 18 00 30    CMPI    r0, 0x30
00DE: 12 00 03    JNZ     loc_0003
00E1: 1F 00 0A    LDI_IN  r0, 0x0A
00E4: 18 00 46    CMPI    r0, 0x46
00E7: 12 00 03    JNZ     loc_0003
00EA: 1F 00 0B    LDI_IN  r0, 0x0B
00ED: 18 00 32    CMPI    r0, 0x32
00F0: 12 00 03    JNZ     loc_0003
00F3: 1F 00 09    LDI_IN  r0, 0x09
00F6: 18 00 54    CMPI    r0, 0x54
00F9: 12 00 03    JNZ     loc_0003
00FC: 1F 00 0D    LDI_IN  r0, 0x0D
00FF: 18 00 32    CMPI    r0, 0x32
0102: 12 00 03    JNZ     loc_0003
0105: 1F 00 0E    LDI_IN  r0, 0x0E
0108: 18 00 35    CMPI    r0, 0x35
010B: 12 00 03    JNZ     loc_0003
010E: 1F 00 0F    LDI_IN  r0, 0x0F
0111: 18 00 5F    CMPI    r0, 0x5F
0114: 12 00 03    JNZ     loc_0003
0117: 1F 00 10    LDI_IN  r0, 0x10
011A: 1F 01 0B    LDI_IN  r1, 0x0B
011D: 17 00 01    CMP     r0, r1
0120: 12 00 03    JNZ     loc_0003
0123: 1F 00 11    LDI_IN  r0, 0x11
0126: 22 01 A8    LDC     r1, 0x38 '8'   ; imm=0xA8 - 112
0129: 17 01 00    CMP     r1, r0
012C: 12 00 03    JNZ     loc_0003
012F: 1F 00 12    LDI_IN  r0, 0x12
0132: 09 01 01    SUBI    r1, 0x01
0135: 17 01 00    CMP     r1, r0
0138: 12 00 03    JNZ     loc_0003
013B: 07 00 80    MOVI    r0, 0x80
013E: 15 00       PUSH    r0
0140: 00          NOP
0141: 13 00 5D    CALL    loc_005D
0144: 16 00       POP     r0
0146: 00          NOP
0147: 09 00 01    SUBI    r0, 0x01
014A: 12 01 3E    JNZ     loc_013E
014D: 1F 00 13    LDI_IN  r0, 0x13
0150: 06 01 00    XOR     r1, r0
0153: 18 01 52    CMPI    r1, 0x52
0156: 12 00 03    JNZ     loc_0003
0159: 1F 00 14    LDI_IN  r0, 0x14
015C: 1F 01 17    LDI_IN  r1, 0x17
015F: 06 00 01    XOR     r0, r1
0162: 12 00 03    JNZ     loc_0003
0165: 18 01 63    CMPI    r1, 0x63
0168: 12 00 03    JNZ     loc_0003
016B: 1F 00 15    LDI_IN  r0, 0x15
016E: 1F 01 16    LDI_IN  r1, 0x16
0171: 03 01 00    SUB     r1, r0
0174: 18 01 03    CMPI    r1, 0x03
0177: 12 00 03    JNZ     loc_0003
017A: 1F 01 10    LDI_IN  r1, 0x10
017D: 08 01 02    ADDI    r1, 0x02
0180: 17 01 00    CMP     r1, r0
0183: 12 00 03    JNZ     loc_0003
0186: 1F 00 18    LDI_IN  r0, 0x18
0189: 18 00 7D    CMPI    r0, 0x7D
018C: 12 00 03    JNZ     loc_0003
018F: 1F 00 19    LDI_IN  r0, 0x19
0192: 18 00 00    CMPI    r0, 0x00
0195: 12 00 03    JNZ     loc_0003
0198: 07 00 80    MOVI    r0, 0x80
019B: 15 00       PUSH    r0
019D: 00          NOP
019E: 13 00 5D    CALL    loc_005D
01A1: 16 00       POP     r0
01A3: 00          NOP
01A4: 09 00 01    SUBI    r0, 0x01
01A7: 12 01 9B    JNZ     loc_019B
01AA: 10 00 2A    JMP     loc_002A
01AD: FF          DB      0xFF   ; unknown
01AE: 00          NOP
01AF: 00          NOP

​ 具体如何解密出 flag 这里就不赘述了,并不复杂。flag:flag{HITCTF2025_287ec47c}

三、结语

​ 结合 AI 写下来感觉也还好,这两题蛮适合 vm 入门。


转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 1621925986@qq.com

💰

×

Help us with donation