HitCTF2025-Reverse·VM
一、前言
说来也是搞笑,从接触 REVERSE 以来,并没有好好去学习或者复现过 vm 逆向,每次打比赛遇到了都选择直接 rm…
昨天刚好看到 HitCTF2025 有两道 vm 逆向,遂学习记录一下。
二、开始
easyVM
附件中是一个 .exe 文件,ida 打开之后,main 函数第一行就是输出,然后可以看到 sub_7FF779ED1270 函数被调用两次,且分别传入不同的上下文结构体,那么基本可以确定 sub_7FF779ED1270 函数就是核心函数,即 VM Engine,跟进 sub_7FF779ED1270 函数进行分析。
其中就是一个 switch-case 结构,根据分析可得到如下:
首先看到
*(a1 + 1060)被频繁读/写,而且每次取 opcode 后都+1,那么就可以确定这是 指令指针(IP);然后是
*(a1 + 1072),参与*(ip + base),可确定是字节码基址;*(a1 + 1080):拿来跟 IP 比较,说明其是 code_end/code_len;*(a1 + 4*i):i 的范围检查是<8,可确定有八个寄存器,且每个为四字节;在 case16 中可以看到 a1 + 1084 被 fgets 写入,可确定其是 输入缓冲;
在 case17 中可以看到 a1 + 1134 被 printf 输出,可确定其是 输出缓冲;
在 case14 中看到 根据
*(a1 + 1064)的值进行判断然后跳转,逻辑即 jnz;sub_7FF779ED11E0 函数就是在更新标志位
char __fastcall sub_7FF779ED11E0(__int64 a1, int a2, int a3, int a4)
{
char v4; // al
char v6; // al
char v7; // cl
char v8; // al
bool v9; // sf
char result; // al
v4 = *(a1 + 1064);
if ( a2 ) // ZF
v6 = v4 & 0xFE;
else
v6 = v4 | 1;
*(a1 + 1064) = v6;
if ( a2 >= 0 ) // SF
v7 = v6 & 0xFD;
else
v7 = v6 | 2;
*(a1 + 1064) = v7;
if ( a2 >= a3 || a4 <= 0 ) // CF
v8 = v7 & 0xFB;
else
v8 = v7 | 4;
*(a1 + 1064) = v8;
v9 = a3 < 0;
if ( a3 > 0 ) // OF
{
if ( a4 > 0 && a2 < 0 )
goto LABEL_18;
v9 = a3 < 0;
}
if ( v9 && a4 < 0 && a2 > 0 )
{
LABEL_18:
result = v8 | 8;
*(a1 + 1064) = result;
return result;
}
result = v8 & 0xF7;
*(a1 + 1064) = result;
return result;
}
然后根据上面的分析结构,对 a1 进行重定义,定义为 结构体。在 IDA 上方的菜单栏,依次点击 View -> Open Subviews -> Local types,然后右键点击 insert,贴入如下结构体的申明
struct VMState
{
int regs[8];
char _pad0[1028];
int ip;
int flags;
char _pad1[4];
unsigned __int8 *code;
int code_end;
char inbuf[50];
char outbuf[128];
};
接着结合分析,可清楚各操作数所对应的操作
__int64 __fastcall sub_7FF779ED1270(struct VMState *vm)
{
__int64 ip; // rdx
unsigned __int8 *code; // r8
int opcode; // ecx
int v5; // edx
int v6; // ecx
unsigned __int8 n0x80; // r9
char v8; // dl
int v9; // ecx
unsigned __int8 n8; // r9
unsigned __int8 n0x32; // dl
int v12; // ecx
unsigned __int8 n8_1; // r9
unsigned __int8 n8_2; // dl
int v15; // ecx
__int64 n8_3; // r9
int v17; // edx
int v18; // ecx
unsigned __int8 n8_4; // r9
unsigned __int8 n8_5; // dl
int v21; // r8d
int *v22; // rcx
int v23; // r9d
__int64 n8_6; // r10
int v25; // r9d
int v26; // r8d
int v27; // ecx
unsigned __int8 n8_7; // r9
unsigned __int8 n8_8; // dl
int v30; // r8d
int *v31; // rcx
int v32; // r9d
__int64 n8_9; // r10
int v34; // r9d
int v35; // r8d
unsigned __int8 n8_10; // r10
unsigned __int8 n8_11; // r9
int *v38; // rdx
int v39; // ecx
__int64 n8_12; // r9
int v41; // edx
int v42; // ecx
unsigned __int8 n8_13; // r9
unsigned __int8 n8_14; // dl
__int64 n8_15; // r9
int v46; // r10d
bool v47; // zf
int v48; // ecx
FILE *v49; // rax
__int64 v50; // rax
if ( vm->ip < vm->code_end )
{
while ( 2 )
{
ip = vm->ip;
code = vm->code;
opcode = code[ip];
v5 = ip + 1;
vm->ip = v5;
switch ( opcode )
{
case 0:
return 0i64;
case 1: // MOVImm2outBuf
v6 = v5 + 1;
n0x80 = code[v5];
vm->ip = v5 + 1;
v8 = code[v5 + 1];
vm->ip = v6 + 1;
if ( n0x80 >= 0x80u )
return 0xFFFFFFFFi64;
vm->outbuf[n0x80] = v8;
goto LABEL_42;
case 2: // MOVinBuf2Reg
v9 = v5 + 1;
n8 = code[v5];
vm->ip = v5 + 1;
n0x32 = code[v5 + 1];
vm->ip = v9 + 1;
if ( n0x32 >= 0x32u || n8 >= 8u )
return 0xFFFFFFFFi64;
vm->regs[n8] = vm->inbuf[n0x32];
goto LABEL_42;
case 3: // MOVReg2Reg
v12 = v5 + 1;
n8_1 = code[v5];
vm->ip = v5 + 1;
n8_2 = code[v5 + 1];
vm->ip = v12 + 1;
if ( n8_1 >= 8u || n8_2 >= 8u )
return 0xFFFFFFFFi64;
vm->regs[n8_1] = vm->regs[n8_2];
goto LABEL_42;
case 4: // MOVImm2Reg
v15 = v5 + 1;
n8_3 = code[v5];
vm->ip = v5 + 1;
v17 = code[v5 + 1];
vm->ip = v15 + 1;
if ( n8_3 >= 8 )
return 0xFFFFFFFFi64;
vm->regs[n8_3] = v17;
goto LABEL_42;
case 5: // ADDReg&Reg
v18 = v5 + 1;
n8_4 = code[v5];
vm->ip = v5 + 1;
n8_5 = code[v5 + 1];
vm->ip = v18 + 1;
if ( n8_4 >= 8u || n8_5 >= 8u )
return 0xFFFFFFFFi64;
v21 = vm->regs[n8_4];
v22 = &vm->regs[n8_4];
v23 = vm->regs[n8_5];
*v22 = v23 + v21;
sub_7FF779ED11E0(vm, v23 + v21, v21, v23);
goto LABEL_42;
case 6: // ADDReg&Imm
n8_6 = code[v5];
vm->ip = v5 + 1;
v25 = code[v5 + 1];
vm->ip = v5 + 2;
if ( n8_6 >= 8 )
return 0xFFFFFFFFi64;
v26 = vm->regs[n8_6];
vm->regs[n8_6] = v26 + v25;
sub_7FF779ED11E0(vm, v26 + v25, v26, v25);
goto LABEL_42;
case 7: // SUBReg&Reg
v27 = v5 + 1;
n8_7 = code[v5];
vm->ip = v5 + 1;
n8_8 = code[v5 + 1];
vm->ip = v27 + 1;
if ( n8_7 >= 8u || n8_8 >= 8u )
return 0xFFFFFFFFi64;
v30 = vm->regs[n8_7];
v31 = &vm->regs[n8_7];
v32 = vm->regs[n8_8];
*v31 = v30 - v32;
sub_7FF779ED11E0(vm, v30 - v32, v30, -v32);
goto LABEL_42;
case 8: // SUBReg&Imm
n8_9 = code[v5];
vm->ip = v5 + 1;
v34 = code[v5 + 1];
vm->ip = v5 + 2;
if ( n8_9 >= 8 )
return 0xFFFFFFFFi64;
v35 = vm->regs[n8_9];
vm->regs[n8_9] = v35 - v34;
sub_7FF779ED11E0(vm, v35 - v34, v35, -v34);
goto LABEL_42;
case 9: // XORReg&Reg
n8_10 = code[v5];
vm->ip = v5 + 1;
n8_11 = code[v5 + 1];
vm->ip = v5 + 2;
if ( n8_10 >= 8u || n8_11 >= 8u )
return 0xFFFFFFFFi64;
v38 = &vm->regs[n8_10];
*v38 ^= vm->regs[n8_11];
sub_7FF779ED11E0(vm, *v38, 0, 0);
goto LABEL_42;
case 10: // XORReg&Imm
v39 = v5 + 1;
n8_12 = code[v5];
vm->ip = v5 + 1;
v41 = code[v5 + 1];
vm->ip = v39 + 1;
if ( n8_12 >= 8 )
return 0xFFFFFFFFi64;
vm->regs[n8_12] ^= v41;
sub_7FF779ED11E0(vm, vm->regs[n8_12], 0, 0);
goto LABEL_42;
case 11: // CMPReg&Reg
v42 = v5 + 1;
n8_13 = code[v5];
vm->ip = v5 + 1;
n8_14 = code[v5 + 1];
vm->ip = v42 + 1;
if ( n8_13 >= 8u || n8_14 >= 8u )
return 0xFFFFFFFFi64;
sub_7FF779ED11E0(vm, vm->regs[n8_13] - vm->regs[n8_14], vm->regs[n8_13], -vm->regs[n8_14]);
goto LABEL_42;
case 12: // CMPReg&Imm
n8_15 = code[v5];
vm->ip = v5 + 1;
v46 = code[v5 + 1];
vm->ip = v5 + 2;
if ( n8_15 >= 8 )
return 0xFFFFFFFFi64;
sub_7FF779ED11E0(vm, vm->regs[n8_15] - v46, vm->regs[n8_15], -v46);
LABEL_42:
if ( vm->ip >= vm->code_end )
return 0i64;
continue;
case 13: // JMP
vm->ip = code[v5];
goto LABEL_42;
case 14: // JNZ
v47 = (vm->flags & 1) == 0;
v48 = code[v5];
vm->ip = v5 + 1;
if ( v47 )
vm->ip = v48;
goto LABEL_42;
case 16: // READ
v49 = _acrt_iob_func(0);
common_fgets<char>(vm->inbuf, 50i64, v49);
v50 = -1i64;
do
++v50;
while ( vm->inbuf[v50] );
if ( v50 && *(&vm->code_end + v50 + 3) == 10 )
*(&vm->code_end + v50 + 3) = 0;
goto LABEL_42;
case 17: // PRINT
printf("%s", vm->outbuf);
goto LABEL_42;
default:
return 0xFFFFFFFFi64;
}
}
}
return 0i64;
}
然后编写脚本把汇编进行输出
OPLEN = {
0x00: 1, 0x01: 3, 0x02: 3, 0x03: 3, 0x04: 3,
0x05: 3, 0x06: 3, 0x07: 3, 0x08: 3, 0x09: 3,
0x0A: 3, 0x0B: 3, 0x0C: 3, 0x0D: 2, 0x0E: 2,
0x10: 1, 0x11: 1,
}
def parse_hex(s: str) -> bytes:
s = s.replace("\n", " ").replace("\t", " ").strip()
return bytes(int(x, 16) for x in s.split() if x)
def disasm(code: bytes) -> None:
ip = 0
while ip < len(code):
op = code[ip]
ln = OPLEN.get(op, 1)
ins = code[ip:ip+ln]
if op not in OPLEN:
print(f"{ip:04x}: db {op:02x}")
ip += 1
continue
def b(i): return ins[i]
if op == 0x00: print(f"{ip:04x}: HALT")
elif op == 0x01: print(f"{ip:04x}: PUTC out[{b(1)}] = {b(2):02x} ('{chr(b(2))}')")
elif op == 0x02: print(f"{ip:04x}: LOAD R{b(1)}, in[{b(2)}]")
elif op == 0x03: print(f"{ip:04x}: MOV R{b(1)}, R{b(2)}")
elif op == 0x04: print(f"{ip:04x}: MOVI R{b(1)}, {b(2)}")
elif op == 0x05: print(f"{ip:04x}: ADD R{b(1)}, R{b(2)}")
elif op == 0x06: print(f"{ip:04x}: ADDI R{b(1)}, {b(2)}")
elif op == 0x07: print(f"{ip:04x}: SUB R{b(1)}, R{b(2)}")
elif op == 0x08: print(f"{ip:04x}: SUBI R{b(1)}, {b(2)}")
elif op == 0x09: print(f"{ip:04x}: XOR R{b(1)}, R{b(2)}")
elif op == 0x0A: print(f"{ip:04x}: XORI R{b(1)}, {b(2)}")
elif op == 0x0B: print(f"{ip:04x}: CMP R{b(1)}, R{b(2)}")
elif op == 0x0C: print(f"{ip:04x}: CMPI R{b(1)}, {b(2)}")
elif op == 0x0D: print(f"{ip:04x}: JMP {b(1):02x}")
elif op == 0x0E: print(f"{ip:04x}: JNZ {b(1):02x}")
elif op == 0x10: print(f"{ip:04x}: READ")
elif op == 0x11: print(f"{ip:04x}: PRINT")
ip += ln
if __name__ == "__main__":
HEX = """
0D 3C 01 06 0A 01 02 6F 01 00
57 01 05 21 01 03 6E 01 01 72
01 04 67 01 07 00 11 00 01 04
65 01 05 63 01 09 00 01 01 6F
01 03 72 01 07 21 01 02 72 01
08 0A 01 00 43 01 06 74 11 00
10 02 00 00 0C 00 66 0E 02 02
00 01 03 01 00 06 01 01 0C 01
6D 0E 02 02 01 02 02 02 03 02
03 04 03 00 01 09 00 02 0C 00
06 0E 02 03 00 02 09 00 03 0C
00 1C 0E 02 03 00 01 09 00 03
0C 00 1A 0E 02 02 01 05 02 02
06 02 03 07 0C 02 69 0E 02 03
00 01 09 00 02 0C 00 21 0E 02
03 00 03 09 00 02 0C 00 3D 0E
02 02 04 08 02 05 09 02 06 0A
0B 05 03 0E 02 03 00 04 09 00
05 0C 00 17 0E 02 03 00 06 09
00 05 0C 00 12 0E 02 02 01 0B
02 02 0C 02 03 0D 02 04 0E 02
05 0F 02 06 10 02 07 11 0B 02
04 0E 02 03 00 01 05 00 00 0C
00 BE 0E 02 09 00 00 0B 00 07
0E 02 05 00 02 0C 00 32 0E 02
08 00 02 0C 00 30 0E 02 06 00
05 0B 00 05 0E 02 02 00 04 07
06 00 0C 06 02 0E 02 0D 1C 00
"""
code = parse_hex(HEX)
disasm(code)
得到输出
0000: JMP 3c
0002: PUTC out[6] = 0a ('
')
0005: PUTC out[2] = 6f ('o')
0008: PUTC out[0] = 57 ('W')
000b: PUTC out[5] = 21 ('!')
000e: PUTC out[3] = 6e ('n')
0011: PUTC out[1] = 72 ('r')
0014: PUTC out[4] = 67 ('g')
0017: PUTC out[7] = 00 (' ')
001a: PRINT
001b: HALT
001c: PUTC out[4] = 65 ('e')
001f: PUTC out[5] = 63 ('c')
0022: PUTC out[9] = 00 (' ')
0025: PUTC out[1] = 6f ('o')
0028: PUTC out[3] = 72 ('r')
002b: PUTC out[7] = 21 ('!')
002e: PUTC out[2] = 72 ('r')
0031: PUTC out[8] = 0a ('
')
0034: PUTC out[0] = 43 ('C')
0037: PUTC out[6] = 74 ('t')
003a: PRINT
003b: HALT
003c: READ
003d: LOAD R0, in[0]
0040: CMPI R0, 102
0043: JNZ 02
0045: LOAD R0, in[1]
0048: MOV R1, R0
004b: ADDI R1, 1
004e: CMPI R1, 109
0051: JNZ 02
0053: LOAD R1, in[2]
0056: LOAD R2, in[3]
0059: LOAD R3, in[4]
005c: MOV R0, R1
005f: XOR R0, R2
0062: CMPI R0, 6
0065: JNZ 02
0067: MOV R0, R2
006a: XOR R0, R3
006d: CMPI R0, 28
0070: JNZ 02
0072: MOV R0, R1
0075: XOR R0, R3
0078: CMPI R0, 26
007b: JNZ 02
007d: LOAD R1, in[5]
0080: LOAD R2, in[6]
0083: LOAD R3, in[7]
0086: CMPI R2, 105
0089: JNZ 02
008b: MOV R0, R1
008e: XOR R0, R2
0091: CMPI R0, 33
0094: JNZ 02
0096: MOV R0, R3
0099: XOR R0, R2
009c: CMPI R0, 61
009f: JNZ 02
00a1: LOAD R4, in[8]
00a4: LOAD R5, in[9]
00a7: LOAD R6, in[10]
00aa: CMP R5, R3
00ad: JNZ 02
00af: MOV R0, R4
00b2: XOR R0, R5
00b5: CMPI R0, 23
00b8: JNZ 02
00ba: MOV R0, R6
00bd: XOR R0, R5
00c0: CMPI R0, 18
00c3: JNZ 02
00c5: LOAD R1, in[11]
00c8: LOAD R2, in[12]
00cb: LOAD R3, in[13]
00ce: LOAD R4, in[14]
00d1: LOAD R5, in[15]
00d4: LOAD R6, in[16]
00d7: LOAD R7, in[17]
00da: CMP R2, R4
00dd: JNZ 02
00df: MOV R0, R1
00e2: ADD R0, R0
00e5: CMPI R0, 190
00e8: JNZ 02
00ea: XOR R0, R0
00ed: CMP R0, R7
00f0: JNZ 02
00f2: ADD R0, R2
00f5: CMPI R0, 50
00f8: JNZ 02
00fa: SUBI R0, 2
00fd: CMPI R0, 48
0100: JNZ 02
0102: ADDI R0, 5
0105: CMP R0, R5
0108: JNZ 02
010a: LOAD R0, in[4]
010d: SUB R6, R0
0110: CMPI R6, 2
0113: JNZ 02
0115: JMP 1c
0117: HALT
就是一些简单的约束,这里就不赘述怎么求解了,最终 flag 就是 flag{HiTCTF_2025}
ComplexVM
这道 vm 比上一道要复杂一些。main 函数的开头部分申明了一个很大的 buffer(v33),明显被当成 VM COntext / 寄存器块 + 字节码区 使用。接着初始化一些变量。然后 sub_140001210 函数就是解释器。
while 循环 1024 个 opcode,其中 sub_140004090 函数一直跟下去是反调试的逻辑。第一个 while 循环就是输出 Input your flag: 的。
第二段就很重要了。构造了另一个 VM 上下文 v36 + v40。跟进 sub_1400048C0 函数可以看到有一个 fake flag。
__int64 __fastcall sub_1400048C0(_BYTE *a1)
{
int n0x11; // r9d
unsigned __int8 *i; // r8
int v3; // edx
int v4; // edx
__int64 n0x11_1; // rax
char flag_HITCTF_fake__[24]; // [rsp+0h] [rbp-28h] BYREF
n0x11 = 0;
strcpy(flag_HITCTF_fake__, "flag{HITCTF_fake}");
for ( i = a1 + 1052; ; ++i )
{
v3 = i[flag_HITCTF_fake__ - a1 - 1052];
a1[8] = v3;
v4 = v3 - 1;
n0x11_1 = *i;
a1[9] = n0x11_1;
if ( v4 )
a1[16] &= ~1u;
else
a1[16] |= 1u;
if ( (v4 & 0x80u) == 0 )
a1[16] &= ~4u;
else
a1[16] |= 4u;
if ( v4 >= 0 )
a1[16] &= ~2u;
else
a1[16] |= 2u;
if ( (a1[16] & 1) == 0 )
break;
++n0x11;
a1[15] = -86;
n0x11_1 = n0x11;
if ( n0x11 >= 0x11 )
return n0x11_1;
}
a1[15] = -1;
return n0x11_1;
}
并且根据伪代码可知该函数是从 a1 + 1052 开始循环的,同时再看 v36 和 v40 之间的相对偏移

差了 0x1C(28)字节,即 a1 + 1052 正好落在 v40 + 1024 这个位置。故可知 sub_1400048C0 函数就是一个假验证逻辑:拿 flag{HITCTF_fake} 去对比 数据区开头(v40 + 1024),而此时 memset(v40,0,sizeof(v40)) 刚清零,所以比较肯定失败。接下来进入解释器分析。
该 vm 的布局如下:
- a1 + 20:指令指针 IP;
- a1 + 16:标志寄存器 Flags(Z/N/C);
- a1 + 8:寄存器文件 regs[];
- a1 + 28:字节码/内存池men[]。它即被 IP 当作“取指/取操作数”的来源,也被当做栈或者中间存储写入;
- a1 + 24:栈指针/栈深 SP。反复出现
if (*(a1+24) >= 0x3FF; - a1 + 1052 / a1 + 1180:输入缓冲区 & 输出缓冲区。
然后根据分析的布局,申明一个结构体
typedef struct VMCTX
{
void **dispatch; // 0x00: off_14001DCF8 这类 handler 表指针
// 0x08..0x0F: 8个“虚拟寄存器/槽位”
// 你看到的 a1[8], a1[9], a1[15] 都落在这里:reg[0]=a1[8], reg[1]=a1[9], ..., reg[7]=a1[15]
unsigned __int8 reg[8]; // 0x08
unsigned int flags; // 0x10: a1[16] 实际就是 flags 的低字节 (ZF/CF/SF 等)
unsigned int ip; // 0x14: *(a1+20) 作为 operand 读取/跳转用的 IP
unsigned int sp; // 0x18: *(a1+24) 栈指针/深度(上限 0x3FF)
unsigned __int8 mem[0x400]; // 0x1C: v40[0..1023] 字节码/内存区(你外层循环就是从这里取 opcode)
char input[0x80]; // 0x41C: a1+1052,fgets 读入,case 31 从这里取字符
char output[0x80]; // 0x49C: a1+1180,case 32 写入,case 29/30 printf 输出
} VMCTX;
可以看到在 switch-case 的后半部分全是特殊的赋值情况,其中一个例子如下。即取一个立即数,然后进行加/减一个数,再进行赋值。

然后前半部分就是一些常见的运算赋值等操作。然后就可以使用脚本进行输出汇编操作了
import sys
import argparse
import re
# operand kinds
R = "R" # 1 byte register index
I8 = "I8" # 1 byte immediate
A16 = "A16" # 2 byte address (big-endian)
# Base opcode table (你已经验证的一批)
OPS = {
0x00: ("NOP", []),
0x01: ("MOV", [R, R]),
0x02: ("ADD", [R, R]),
0x03: ("SUB", [R, R]),
0x04: ("AND", [R, R]),
0x05: ("OR", [R, R]),
0x06: ("XOR", [R, R]),
0x07: ("MOVI", [R, I8]),
0x08: ("ADDI", [R, I8]),
0x09: ("SUBI", [R, I8]),
0x0A: ("ANDI", [R, I8]),
0x0B: ("ORI", [R, I8]),
0x0C: ("XORI", [R, I8]),
0x0D: ("NOT", [R]),
0x0E: ("SHL1", [R]),
0x0F: ("SHR1", [R]),
0x10: ("JMP", [A16]),
0x11: ("JZ", [A16]),
0x12: ("JNZ", [A16]),
0x13: ("CALL", [A16]),
0x14: ("RET", []),
0x15: ("PUSH", [R]),
0x16: ("POP", [R]),
0x17: ("CMP", [R, R]),
0x18: ("CMPI", [R, I8]),
0x19: ("INC", [R]),
0x1A: ("DEC", [R]),
0x1B: ("DAA", []),
0x1C: ("READ", []),
0x1D: ("PRINT", []),
0x1E: ("PRINT2", []),
0x1F: ("LDI_IN", [R, I8]), # r = input[idx]
0x20: ("STO_OUT", [I8, R]), # output[idx] = r
}
# 你贴出来的 0x21~0x25:格式都是 [opcode][dst][imm],语义 reg[dst] = imm + adj
# case 0x21: imm - 54
# case 0x22: imm - 112
# case 0x23: imm + 68
# case 0x24: imm + 57
# case 0x25: imm + 78
LDC_ADJ = {
0x21: -54,
0x22: -112,
0x23: +68,
0x24: +57,
0x25: +78,
}
def u16be(b0, b1):
return ((b0 & 0xFF) << 8) | (b1 & 0xFF)
def is_printable(x):
return 0x20 <= x <= 0x7E
def decode_one(code, pc, guess_ldc=False):
"""
返回 (next_pc, asm_line, jump_target_or_None)
"""
n = len(code)
if pc >= n:
return pc + 1, "%04X: <eof>" % pc, None
op = code[pc]
# LDC 变体:固定 3 字节
if op in LDC_ADJ:
if pc + 3 > n:
raw = " ".join("%02X" % b for b in code[pc:])
return n, "%04X: %s ; truncated" % (pc, raw), None
dst = code[pc+1]
imm = code[pc+2]
adj = LDC_ADJ[op]
val = (imm + adj) & 0xFF
ch = " '%s'" % chr(val) if is_printable(val) else ""
raw = "%02X %02X %02X" % (op, dst, imm)
sign = "+" if adj >= 0 else "-"
line = "%04X: %-11s LDC r%d, 0x%02X%s ; imm=0x%02X %s %d" % (
pc, raw, dst, val, ch, imm, sign, abs(adj)
)
return pc + 3, line, None
# 如果启用 guess_ldc:未知但在 0x21..0xFE 也按 3 字节吃掉,避免失步(可选)
if guess_ldc and (0x21 <= op <= 0xFE) and (op not in OPS):
if pc + 3 > n:
raw = " ".join("%02X" % b for b in code[pc:])
return n, "%04X: %s ; truncated" % (pc, raw), None
dst = code[pc+1]
imm = code[pc+2]
raw = "%02X %02X %02X" % (op, dst, imm)
line = "%04X: %-11s OP%02X r%d, 0x%02X" % (pc, raw, op, dst, imm)
return pc + 3, line, None
# 普通表驱动
if op not in OPS:
raw = "%02X" % op
return pc + 1, "%04X: %-11s DB 0x%02X ; unknown" % (pc, raw, op), None
mnem, kinds = OPS[op]
# 计算指令长度
need = 1
for k in kinds:
need += 2 if k == A16 else 1
if pc + need > n:
raw = " ".join("%02X" % b for b in code[pc:])
return n, "%04X: %s ; truncated" % (pc, raw), None
raw_bytes = code[pc:pc+need]
raw = " ".join("%02X" % b for b in raw_bytes)
# 读操作数
i = pc + 1
ops_txt = []
jmp_tgt = None
for k in kinds:
if k == A16:
tgt = u16be(code[i], code[i+1])
ops_txt.append("loc_%04X" % tgt)
jmp_tgt = tgt
i += 2
elif k == R:
r = code[i]
ops_txt.append("r%d" % r)
i += 1
else: # I8
imm = code[i]
ops_txt.append("0x%02X" % imm)
i += 1
if ops_txt:
line = "%04X: %-11s %-7s %s" % (pc, raw, mnem, ", ".join(ops_txt))
else:
line = "%04X: %-11s %s" % (pc, raw, mnem)
return pc + need, line, jmp_tgt
def disasm(code, labels=False, guess_ldc=False):
# pass1: 收集跳转目标(可选)
targets = set()
if labels:
pc = 0
while pc < len(code):
npc, line, tgt = decode_one(code, pc, guess_ldc=guess_ldc)
if tgt is not None:
targets.add(tgt)
if npc <= pc:
break
pc = npc
# pass2: 输出
pc = 0
while pc < len(code):
if labels and pc in targets:
print("loc_%04X:" % pc)
npc, line, tgt = decode_one(code, pc, guess_ldc=guess_ldc)
print(line)
if npc <= pc:
break
pc = npc
def read_hex_stream(text):
# 允许空格/换行
parts = text.strip().split()
if not parts:
return b""
return bytes(int(x, 16) for x in parts)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--hexfile", help="hex bytes file (e.g. code.hex). If omitted, read from stdin.")
ap.add_argument("--labels", action="store_true", help="print labels for jump targets")
ap.add_argument("--guess-ldc", action="store_true",
help="treat unknown opcodes in 0x21..0xFE as 3-byte (opcode,dst,imm) to avoid desync")
args = ap.parse_args()
if args.hexfile:
text = open(args.hexfile, "r", encoding="utf-8", errors="ignore").read()
else:
text = sys.stdin.read()
code = read_hex_stream(text)
if not code:
print("No input bytes. Usage: python vm_disasm.py --hexfile code.hex")
return
disasm(code, labels=args.labels, guess_ldc=args.guess_ldc)
if __name__ == "__main__":
main()
拿到输出
0000: 13 00 75 CALL loc_0075
0003: 06 05 05 XOR r5, r5
0006: 07 01 72 MOVI r1, 0x72
0009: 07 03 6E MOVI r3, 0x6E
000C: 07 02 6F MOVI r2, 0x6F
000F: 07 04 67 MOVI r4, 0x67
0012: 07 00 57 MOVI r0, 0x57
0015: 20 00 00 STO_OUT 0x00, r0
0018: 20 04 04 STO_OUT 0x04, r4
001B: 20 02 02 STO_OUT 0x02, r2
001E: 20 03 03 STO_OUT 0x03, r3
0021: 20 01 01 STO_OUT 0x01, r1
0024: 20 05 05 STO_OUT 0x05, r5
0027: 1E PRINT2
0028: 00 NOP
0029: FF DB 0xFF ; unknown
002A: 07 01 6F MOVI r1, 0x6F
002D: 07 04 65 MOVI r4, 0x65
0030: 07 02 72 MOVI r2, 0x72
0033: 07 06 74 MOVI r6, 0x74
0036: 07 00 43 MOVI r0, 0x43
0039: 07 05 63 MOVI r5, 0x63
003C: 07 03 72 MOVI r3, 0x72
003F: 06 07 07 XOR r7, r7
0042: 20 03 03 STO_OUT 0x03, r3
0045: 20 01 01 STO_OUT 0x01, r1
0048: 20 02 02 STO_OUT 0x02, r2
004B: 20 00 00 STO_OUT 0x00, r0
004E: 20 04 04 STO_OUT 0x04, r4
0051: 20 06 06 STO_OUT 0x06, r6
0054: 20 05 05 STO_OUT 0x05, r5
0057: 20 07 07 STO_OUT 0x07, r7
005A: 1E PRINT2
005B: 00 NOP
005C: FF DB 0xFF ; unknown
005D: 07 00 FF MOVI r0, 0xFF
0060: 09 00 01 SUBI r0, 0x01
0063: 12 00 60 JNZ loc_0060
0066: 14 RET
0067: 00 NOP
0068: 00 NOP
0069: 07 00 AA MOVI r0, 0xAA
006C: 07 01 55 MOVI r1, 0x55
006F: 06 00 01 XOR r0, r1
0072: 14 RET
0073: 00 NOP
0074: 00 NOP
0075: 07 00 00 MOVI r0, 0x00
0078: 13 00 69 CALL loc_0069
007B: 07 01 FF MOVI r1, 0xFF
007E: 17 00 01 CMP r0, r1
0081: 12 00 03 JNZ loc_0003
0084: 1C READ
0085: 00 NOP
0086: 00 NOP
0087: 1F 00 02 LDI_IN r0, 0x02
008A: 18 00 61 CMPI r0, 0x61
008D: 12 00 03 JNZ loc_0003
0090: 1F 00 01 LDI_IN r0, 0x01
0093: 18 00 6C CMPI r0, 0x6C
0096: 12 00 03 JNZ loc_0003
0099: 1F 00 00 LDI_IN r0, 0x00
009C: 18 00 66 CMPI r0, 0x66
009F: 12 00 03 JNZ loc_0003
00A2: 1F 00 03 LDI_IN r0, 0x03
00A5: 18 00 67 CMPI r0, 0x67
00A8: 12 00 03 JNZ loc_0003
00AB: 1F 00 04 LDI_IN r0, 0x04
00AE: 18 00 7B CMPI r0, 0x7B
00B1: 12 00 03 JNZ loc_0003
00B4: 1F 00 07 LDI_IN r0, 0x07
00B7: 18 00 54 CMPI r0, 0x54
00BA: 12 00 03 JNZ loc_0003
00BD: 1F 00 06 LDI_IN r0, 0x06
00C0: 18 00 49 CMPI r0, 0x49
00C3: 12 00 03 JNZ loc_0003
00C6: 1F 00 05 LDI_IN r0, 0x05
00C9: 18 00 48 CMPI r0, 0x48
00CC: 12 00 03 JNZ loc_0003
00CF: 1F 00 08 LDI_IN r0, 0x08
00D2: 18 00 43 CMPI r0, 0x43
00D5: 12 00 03 JNZ loc_0003
00D8: 1F 00 0C LDI_IN r0, 0x0C
00DB: 18 00 30 CMPI r0, 0x30
00DE: 12 00 03 JNZ loc_0003
00E1: 1F 00 0A LDI_IN r0, 0x0A
00E4: 18 00 46 CMPI r0, 0x46
00E7: 12 00 03 JNZ loc_0003
00EA: 1F 00 0B LDI_IN r0, 0x0B
00ED: 18 00 32 CMPI r0, 0x32
00F0: 12 00 03 JNZ loc_0003
00F3: 1F 00 09 LDI_IN r0, 0x09
00F6: 18 00 54 CMPI r0, 0x54
00F9: 12 00 03 JNZ loc_0003
00FC: 1F 00 0D LDI_IN r0, 0x0D
00FF: 18 00 32 CMPI r0, 0x32
0102: 12 00 03 JNZ loc_0003
0105: 1F 00 0E LDI_IN r0, 0x0E
0108: 18 00 35 CMPI r0, 0x35
010B: 12 00 03 JNZ loc_0003
010E: 1F 00 0F LDI_IN r0, 0x0F
0111: 18 00 5F CMPI r0, 0x5F
0114: 12 00 03 JNZ loc_0003
0117: 1F 00 10 LDI_IN r0, 0x10
011A: 1F 01 0B LDI_IN r1, 0x0B
011D: 17 00 01 CMP r0, r1
0120: 12 00 03 JNZ loc_0003
0123: 1F 00 11 LDI_IN r0, 0x11
0126: 22 01 A8 LDC r1, 0x38 '8' ; imm=0xA8 - 112
0129: 17 01 00 CMP r1, r0
012C: 12 00 03 JNZ loc_0003
012F: 1F 00 12 LDI_IN r0, 0x12
0132: 09 01 01 SUBI r1, 0x01
0135: 17 01 00 CMP r1, r0
0138: 12 00 03 JNZ loc_0003
013B: 07 00 80 MOVI r0, 0x80
013E: 15 00 PUSH r0
0140: 00 NOP
0141: 13 00 5D CALL loc_005D
0144: 16 00 POP r0
0146: 00 NOP
0147: 09 00 01 SUBI r0, 0x01
014A: 12 01 3E JNZ loc_013E
014D: 1F 00 13 LDI_IN r0, 0x13
0150: 06 01 00 XOR r1, r0
0153: 18 01 52 CMPI r1, 0x52
0156: 12 00 03 JNZ loc_0003
0159: 1F 00 14 LDI_IN r0, 0x14
015C: 1F 01 17 LDI_IN r1, 0x17
015F: 06 00 01 XOR r0, r1
0162: 12 00 03 JNZ loc_0003
0165: 18 01 63 CMPI r1, 0x63
0168: 12 00 03 JNZ loc_0003
016B: 1F 00 15 LDI_IN r0, 0x15
016E: 1F 01 16 LDI_IN r1, 0x16
0171: 03 01 00 SUB r1, r0
0174: 18 01 03 CMPI r1, 0x03
0177: 12 00 03 JNZ loc_0003
017A: 1F 01 10 LDI_IN r1, 0x10
017D: 08 01 02 ADDI r1, 0x02
0180: 17 01 00 CMP r1, r0
0183: 12 00 03 JNZ loc_0003
0186: 1F 00 18 LDI_IN r0, 0x18
0189: 18 00 7D CMPI r0, 0x7D
018C: 12 00 03 JNZ loc_0003
018F: 1F 00 19 LDI_IN r0, 0x19
0192: 18 00 00 CMPI r0, 0x00
0195: 12 00 03 JNZ loc_0003
0198: 07 00 80 MOVI r0, 0x80
019B: 15 00 PUSH r0
019D: 00 NOP
019E: 13 00 5D CALL loc_005D
01A1: 16 00 POP r0
01A3: 00 NOP
01A4: 09 00 01 SUBI r0, 0x01
01A7: 12 01 9B JNZ loc_019B
01AA: 10 00 2A JMP loc_002A
01AD: FF DB 0xFF ; unknown
01AE: 00 NOP
01AF: 00 NOP
具体如何解密出 flag 这里就不赘述了,并不复杂。flag:flag{HITCTF2025_287ec47c}
三、结语
结合 AI 写下来感觉也还好,这两题蛮适合 vm 入门。
转载请注明来源,欢迎对文章中的引用来源进行考证,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 1621925986@qq.com