CTFZone matreshka

My friend from Russia brought me a doll, but there is something in it. Can you figure out what is this?

附件下载

运行程序提示输入flag,查看sub_6810,有一个巨大的switch-case,那么应该是个虚拟机:

流程图

但是看每个case调用的函数,基本是一大坨,很难判断代码的作用。先找一下虚拟机指令的位置,在sub_12E0中可以发现两段数据:unk_8040unk_81E0,大小均已知。然后看看虚拟机是如何解析指令的,回到sub_6810,逐句分析switch前的代码,发现对a1指针取值的数据类型不同,猜测是一个C++对象的指针:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
__int64 __fastcall sub_6810(__int64 a1)
{
v1 = 0LL;
v2 = 5;
v4 = *(_WORD **)a1; // 取了2个字节
v5 = *(_QWORD *)(a1 + 24); // 取了8个字节
v6 = v4[13]; // 程序计数器
v7 = v6 + 5;
v8 = v6;
v9 = v6 & 7;
v4[13] = v7;
v10 = v8 >> 3;
while ( 1 )
{
v11 = *(unsigned __int8 *)(v5 + v10);
if ( v2 + v9 <= 8 )
break;
v2 -= 8;
++v10;
if ( v9 )
{
v1 |= (int)(unsigned __int8)((_DWORD)v11 << v9) >> v9;
v2 += v9;
if ( !v2 )
goto LABEL_7;
}
else
{
v1 = v11 | (v1 << 8);
if ( !v2 )
goto LABEL_7;
}
v9 = 0;
}
v1 = ((int)(unsigned __int8)((_DWORD)v11 << v9) >> (8 - v2)) | (v1 << v2);
LABEL_7:
switch ( (char)v1 )...
}

然后分析while (1)后面的代码,并把断点下在68A0处验证,可以看出这些操作是从v5这个指针指向的数据(位偏移量是v6)中取出5个bit放入v1。5bit可以对应32个值,而switch中有25个case,那这个v1应该是操作码了。

然后我们分析每个case,先进入sub_55D0,虽然说代码很多,但分析后可以看出都是些固定的内联函数,忽略它们以后其实也没那么复杂好吧,还是有点复杂的。函数接受一个虚拟机对象指针,并且从指令流中继续读取3个bit存入v4,然后紧接着又是一个switch:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
char __fastcall sub_55D0(_QWORD *a1)
{
v2 = 3;
v3 = *a1;
v4 = 0LL;
v5 = a1[3];
v6 = *(_WORD *)(*a1 + 26LL);
v7 = v6 + 3;
*(_WORD *)(v3 + 26) = v6 + 3;
v8 = v6 & 7;
v9 = v6 >> 3;
v10 = v8;
while ( 1 )
{
v11 = *(unsigned __int8 *)(v5 + v9);
if ( v2 + v10 <= 8 )
break;
v2 -= 8;
++v9;
if ( v10 )
{
v11 = (int)(unsigned __int8)((_DWORD)v11 << v10) >> v10;
v4 |= v11;
v2 += v10;
if ( !v2 )
goto LABEL_7;
v10 = 0;
}
else
{
v4 = v11 | (v4 << 8);
if ( !v2 )
goto LABEL_7;
v10 = 0;
}
}
v11 = (int)(unsigned __int8)((_DWORD)v11 << v10) >> (8 - v2);
v4 = v11 | (v4 << v2);
LABEL_7:
switch ( v4 )...
}

3个bit对应8个case,每个case会继续从指令流中读取一些bit,先看看case 0:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
      v38 = v4;
v39 = v7 & 7;
*(_WORD *)(v3 + 26) = v6 + 7;
v40 = (unsigned __int16)(v6 + 3) >> 3;
v41 = 4;
while ( 1 )
{
v42 = (unsigned __int8 *)(v5 + v40);
if ( v41 + v39 <= 8 )
break;
v41 -= 8;
++v40;
if ( v39 )
{
v38 |= (int)(unsigned __int8)(*v42 << v39) >> v39;
v41 += v39;
if ( !v41 )
goto LABEL_73;
v39 = 0;
}
else
{
v38 = *v42 | (v38 << 8);
if ( !v41 )
goto LABEL_73;
v39 = 0;
}
}
v38 = ((int)(unsigned __int8)(*v42 << v39) >> (8 - v41)) | (v38 << v41);
LABEL_73:
v73 = 4;
*(_WORD *)(v3 + 26) = v6 + 11;
v74 = ((_BYTE)v6 + 7) & 7;
v75 = (unsigned __int16)(v6 + 7) >> 3;
while ( 1 )
{
v76 = (unsigned __int8 *)(v5 + v75);
if ( v73 + v74 <= 8 )
break;
v73 -= 8;
++v75;
if ( v74 )
{
v4 |= (int)(unsigned __int8)(*v76 << v74) >> v74;
v73 += v74;
if ( !v73 )
goto LABEL_121;
}
else
{
v4 = *v76 | (v4 << 8);
if ( !v73 )
goto LABEL_121;
}
v74 = 0;
}
v4 = ((int)(unsigned __int8)(*v76 << v74) >> (8 - v73)) | (v4 << v73);
LABEL_121:
LOWORD(v11) = *(_WORD *)(v3 + 2LL * (unsigned __int8)v4);
*(_WORD *)(v3 + 2LL * (unsigned __int8)v38) = v11;
return v11;

看着代码多,实际就两次取指令数据,每次取4个bit,分别放到v38和v4里面。然后进行赋值,类似于x86的mov指令。然后把8个case都分析一下,虚拟机对象的数据结构和函数的功能就可以大致推测出来了:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
struct vector_u16
{
unsigned __int16 *start;
unsigned __int16 *end;
unsigned __int16 *end_cap;
};
struct vector_u8
{
unsigned __int8 *start;
unsigned __int8 *end;
unsigned __int8 *end_cap;
};
struct vm
{
vector_u16 regs; // count: 16
vector_u8 mem;
unsigned __int8 dummy; // unknown field
unsigned __int8 result;
};

把它们放入local types,便于后面的分析。然后就是体力活了,就是相当纯粹的逆向,把20多个指令都逆一下,可以得到下面的指令对应表:

指令 操作
00000 halt
00001 000 x4 y4 mov reg:x, reg:y
00001 001 x4 y16 mov reg:x, y
00001 010 0 x4 y4 mov byte[reg:x], reg:y
00001 010 1 x4 y4 mov [reg:x], reg:y
00001 011 0 x4, y8 mov byte[reg:x], y
00001 011 1 x4 y16 mov [reg:x], y
00001 100 x4 y4 mov reg:x, byte[reg:y
00001 101 x4 y4 mov reg:x, [reg:y
00001 110 x4 y16 mov reg:x, byte[y
00001 111 x4 y16 mov reg:x, [y
00010 00 x4 y4 add reg:x, reg:y
00010 01 x4 y16 add reg:x, y
00010 10 1 x4 y4 add [reg:x], reg:y
00010 10 0 x4 y4 add byte[reg:x], reg:y
00010 11 1 x4 y16 add [reg:x], y
00010 11 0 x4 y8 add byte[reg:x], y
00011 00 x4 y4 sub reg:x, reg:y
00011 01 x4 y16 sub reg:x, y
00011 10 1 x4 y4 sub [reg:x], reg:y
00011 10 0 x4 y4 sub byte[reg:x], reg:y
00011 11 1 x4 y16 sub [reg:x], y
00011 11 0 x4 y8 sub byte[reg:x], y
00100 00 x4 y4 umul reg:x, reg:y
00100 01 x4 y16 umul reg:x, y
00100 10 1 x4 y4 umul [reg:x], reg:y
00100 10 0 x4 y4 umul byte[reg:x], reg:y
00100 11 1 x4 y16 umul [reg:x], y
00100 11 0 x4 y8 umul byte[reg:x], y
00101 00 x4 y4 udiv reg:x, reg:y
00101 01 x4 y16 udiv reg:x, y
00101 10 1 x4 y4 udiv [reg:x], reg:y
00101 10 0 x4 y4 udiv byte[reg:x], reg:y
00101 11 1 x4 y16 udiv [reg:x], y
00101 11 0 x4 y8 udiv byte[reg:x], y
00110 0 x4, y4 xor reg:x, reg:y
00110 1 0 x4 y8 xor reg:x, y
00110 1 1 x4 y16 xor reg:x, y
00111 0 x4, y4 and reg:x, reg:y
00111 1 0 x4 y8 and reg:x, y
00111 1 1 x4 y16 and reg:x, y
01000 0 x4, y4 or reg:x, reg:y
01000 1 0 x4 y8 or reg:x, y
01000 1 1 x4 y16 or reg:x, y
01001 0 x4 y4 cmp reg:x, reg:y
01001 1 x4, y16 cmp reg:x, y
01010 0 x4 jmp reg:x
01010 1 x16 jmp x
01011 0 x4 jae reg:x
01011 1 x16 jae x
01100 0 x4 jbe reg:x
01100 1 x16 jbe x
01101 0 x4 ja reg:x
01101 1 x16 ja x
01110 0 x4 jb reg:x
01110 1 x16 jb x
01111 0 x4 push reg:x
01111 1 x16 push x
10000 x4 pop reg:x
10001 syscall
10010 0 x4 call reg:x
10010 1 x16 call x
10011 ret
10100 0 x4 jnz reg:x
10100 1 x16 jnz x
10101 0 x4 jz reg:x
10101 1 x16 jz x
10110 enter
10111 leave
11000 nop
11001 nop

其中x4代表x是一个4位整数;reg:x代表x号寄存器,如r0;独立的x代表立即数;[x]代表地址x处的值。syscall看着吓人,实际就两个功能:读取输入缓冲区和写到输出缓冲区(其实还有一个分支,但没有被用到)。

在逆的过程中,一些寄存器的含义也能猜测出来:

寄存器 对应x86的寄存器 含义
r11 rbp 栈底指针
r12 rsp 栈顶指针
r13 rip 程序计数器
r14 rdx 存放除法指令的余数
r15 rflags 标志寄存器(0:ZF(op1==op2), 1:SF(op1<op2))

我们可以写个脚本,把指令流转化成人类可读的格式:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import bitstring

bits = open('mem', 'rb').read()
bits = bitstring.Bits(bits)

def dism(bits, addr):
def operand(addr, len):
if len == 4:
opr = bits[addr:addr+4].uint
if opr < 11: return 'r%d' % opr
else: return ('rbp', 'rsp', 'rip', 'mod', 'flags')[opr - 11]
elif len in (8, 16):
return '0x%X' % bits[addr:addr+len].uint

op = bits[addr:addr+5].uint
addr += 5
if op == 1:
ops = bits[addr:addr+3].uint
addr += 3
if ops == 0:
return 'mov %s, %s' % (operand(addr, 4), operand(addr + 4, 4)), addr + 8
elif ops == 1:
return 'mov %s, %s' % (operand(addr, 4), operand(addr + 4, 16)), addr + 20
elif ops == 2:
opss = bits[addr:addr+1].uint
addr += 1
if opss == 0:
return 'mov byte[%s], %s' % (operand(addr, 4), operand(addr + 4, 4)), addr + 8
else:
return 'mov [%s], %s' % (operand(addr, 4), operand(addr + 4, 4)), addr + 8
elif ops == 3:
opss = bits[addr:addr+1].uint
addr += 1
if opss == 0:
return 'mov byte[%s], %s' % (operand(addr, 4), operand(addr + 4, 8)), addr + 12
else:
return 'mov [%s], %s' % (operand(addr, 4), operand(addr + 4, 16)), addr + 20
elif ops == 4:
return 'mov %s, byte[%s]' % (operand(addr, 4), operand(addr + 4, 4)), addr + 8
elif ops == 5:
return 'mov %s, [%s]' % (operand(addr, 4), operand(addr + 4, 4)), addr + 8
elif ops == 6:
return 'mov %s, byte[%s]' % (operand(addr, 4), operand(addr + 4, 16)), addr + 20
elif ops == 7:
return 'mov %s, [%s]' % (operand(addr, 4), operand(addr + 4, 16)), addr + 20
elif op in range(2, 6):
tag = ('add', 'sub', 'mul', 'div')[op - 2]
ops = bits[addr:addr+2].uint
addr += 2
if ops == 0:
return '%s %s, %s' % (tag, operand(addr, 4), operand(addr + 4, 4)), addr + 8
elif ops == 1:
return '%s %s, %s' % (tag, operand(addr, 4), operand(addr + 4, 16)), addr + 20
elif ops == 2:
opss = bits[addr:addr+1].uint
addr += 1
if opss == 0:
return '%s byte[%s], %s' % (tag, operand(addr, 4), operand(addr + 4, 4)), addr + 8
else:
return '%s [%s], %s' % (tag, operand(addr, 4), operand(addr + 4, 4)), addr + 8
elif ops == 3:
opss = bits[addr:addr+1].uint
addr += 1
if opss == 0:
return '%s byte[%s], %s' % (tag, operand(addr, 4), operand(addr + 4, 16)), addr + 20
else:
return '%s [%s], %s' % (tag, operand(addr, 4), operand(addr + 4, 8)), addr + 12
elif op in range(6, 9):
tag = ('xor', 'and', 'or')[op - 6]
ops = bits[addr:addr+1].uint
addr += 1
if ops == 0:
return '%s %s, %s' % (tag, operand(addr, 4), operand(addr + 4, 4)), addr + 8
else:
opss = bits[addr:addr+1].uint
addr += 1
if opss == 0:
return '%s %s, %s' % (tag, operand(addr, 4), operand(addr + 4, 8)), addr + 12
else:
return '%s %s, %s' % (tag, operand(addr, 4), operand(addr + 4, 16)), addr + 20
elif op == 9:
ops = bits[addr:addr+1].uint
addr += 1
if ops == 0:
return 'cmp %s, %s' % (operand(addr, 4), operand(addr + 4, 4)), addr + 8
else:
return 'cmp %s, %s' % (operand(addr, 4), operand(addr + 4, 16)), addr + 20
elif op in range(10, 16) or op in (18, 20, 21):
tag = ('jmp', 'jae', 'jbe', 'ja', 'jb', 'push', None, None, 'call', None, 'jnz', 'jz')[op - 10]
ops = bits[addr:addr+1].uint
addr += 1
if ops == 0:
return '%s %s' % (tag, operand(addr, 4)), addr + 4
else:
return '%s %s' % (tag, operand(addr, 16)), addr + 16
elif op == 16:
return 'pop %s' % operand(addr, 4), addr + 4
elif op == 17:
return 'syscall', addr
elif op == 19:
return 'ret', addr
elif op in range(22, 26):
return ('enter', 'leave', 'nop', 'nop')[op - 22]
else:
return 'halt %d' % op, addr

addr = 0
while addr < 0xCCE:
dis, next = dism(bits, addr)
print('%03X %s' % (addr, dis))
addr = next
(addr, dis))
addr = next

输出数据简单处理一下,得到下面的:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
000 jmp start

sub_016:
016 call sub_AD6
02C add r0, 0x2
047 mov r0, [r0]
057 ret

sub_05C:
05C call sub_AD6
072 add r0, 0x1
08D mov r0, byte[r0]
09D ret

sub_0A2:
0A2 call sub_AD6
0B8 add r0, 0x2
0D3 mov r0, byte[r0]
0E3 ret

sub_0E8:
0E8 call sub_0A2
0FE call sub_B0D
114 mov r1, r0
124 call sub_05C
13A call sub_B58
150 mov r0, 0x3
16C call sub_A6A
182 ret

sub_187:
187 call sub_016
19D mov r1, r0
1AD call sub_05C
1C3 call sub_B58
1D9 mov r0, 0x4
1F5 call sub_A6A
20B ret

sub_210:
210 call sub_0A2
226 call sub_B0D
23C mov r1, r0
24C call sub_05C
262 call sub_B0D
278 mov byte[r0], r1
289 mov r0, 0x3
2A5 call sub_A6A
2BB ret

sub_2C0:
2C0 call sub_0A2
2D6 call sub_B0D
2EC mov r1, byte[r0]
2FC call sub_05C
312 call sub_B58
328 mov r0, 0x3
344 call sub_A6A
35A ret

sub_35F:
35F call sub_0A2
375 call sub_B0D
38B mov r1, r0
39B call sub_05C
3B1 push r0
3BB call sub_B0D
3D1 add r1, r0
3E0 pop r0
3E9 call sub_B58
3FF mov r0, 0x3
41B call sub_A6A
431 ret

sub_436:
436 call sub_0A2
44C call sub_B0D
462 mov r1, r0
472 call sub_05C
488 push r0
492 call sub_B0D
4A8 sub r0, r1
4B7 mov r1, r0
4C7 pop r0
4D0 call sub_B58
4E6 mov r0, 0x3
502 call sub_A6A
518 ret

sub_51D:
51D call sub_0A2
533 call sub_B0D
549 mov r1, r0
559 call sub_05C
56F push r0
579 call sub_B0D
58F xor r1, r0
59D pop r0
5A6 call sub_B58
5BC mov r0, 0x3
5D8 call sub_A6A
5EE ret

sub_5F3:
5F3 call sub_0A2
609 call sub_B0D
61F mov r1, r0
62F call sub_05C
645 call sub_B0D
65B cmp r0, r1
669 jnz loc_6B1
67F mov r1, 0x1
69B jmp loc_6BF
loc_6B1:
6B1 xor r1, r1
loc_6BF:
6BF mov r0, 0xE
6DB call sub_B58
6F1 mov r0, 0x3
70D call sub_A6A
723 ret

sub_728:
728 mov r0, 0xE
744 call sub_B0D
75A cmp r0, 0x1
774 jz loc_813
78A call sub_AD6
7A0 add r0, 0x1
7BB mov r1, [r0]
7CB mov r0, 0xF
7E7 call sub_B58
7FD jmp loc_845
loc_813:
813 mov r0, 0x3
82F call sub_A6A
loc_845:
845 ret

sub_84A:
84A xor r0, r0
858 call sub_B0D
86E mov r1, r0
87E mov r0, 0x1
89A call sub_B0D
8B0 mov r2, r0
8C0 mov r0, 0x1
8DC syscall
8E1 mov r1, r0
8F1 mov r0, 0x0
90D call sub_B58
923 mov r0, 0x1
93F call sub_A6A
955 ret

sub_95A:
95A xor r0, r0
968 call sub_B0D
97E mov r1, r0
98E mov r0, 0x1
9AA call sub_B0D
9C0 mov r2, r0
9D0 mov r0, 0x0
9EC syscall
9F1 mov r1, r0
A01 mov r0, 0x0
A1D call sub_B58
A33 mov r0, 0x1
A4F call sub_A6A
A65 ret

sub_A6A:
A6A mov r1, r0
A7A call sub_AD6
A90 add r1, r0
A9F mov r0, 0xF
ABB call sub_B58
AD1 ret

sub_AD6:
AD6 mov r0, 0xF
AF2 call sub_B0D
B08 ret

sub_B0D:
B0D mul r0, 0x2
B28 add r0, 0x4000
B43 mov r0, [r0]
B53 ret

sub_B58:
B58 mul r0, 0x2
B73 add r0, 0x4000
B8E mov [r0], r1
B9F ret

sub_BA4:
BA4 call sub_AD6
BBA mov r0, byte[r0]
BCA cmp r0, 0xB
BE4 jz loc_C60
BFA mul r0, 0x2
C15 add r0, 0x3800
C30 mov r0, [r0]
C40 call r0
C4A jmp sub_BA4
loc_C60:
C60 ret

start:
C65 mov r0, 0xF
C81 mov r1, 0x2000
C9D call sub_B58
CB3 call sub_BA4
CC9 halt 0

可以看出,start调用了sub_BA4,而后者中有一行call r0。经过分析发现这又是另一个虚拟机(虚拟机2),这就是题目“俄罗斯套娃”的由来吧。虚拟机1的内存0x3800处存放了虚拟机2的jump table0x4000处存放虚拟机2的寄存器;0x2000存放虚拟机2的指令。把跳转表中的每个函数都看一遍,它们的对应功能如下:

指令 操作
00 x8 y8 mov reg:x, reg:y
01 x8 y16 mov reg:x, y
02 x8 y8 mov byte[reg:x], reg:y
03 x8 y8 mov reg:x, byte[reg:y]
04 x8 y8 add reg:x, reg:y
05 x8 y8 sub reg:x, reg:y(但好像没有用到)
06 x8 y8 xor reg:x, reg:y
07 x8 y8 cmp reg:x, reg:y
08 x16 jnz x
09 read r0, r1(读取输入缓冲区到r0指向的内存,长度为r1
0A write r0, r1(输出缓冲区写出到r0指向的内存,长度为r1
0B halt

这个就不写脚本了,直接手逆吧:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
2000 01 00 00 39    mov r0, 0x3900
2004 01 01 0D 00 mov r1, 0x00D0
2008 0A write r0, r1
2009 01 00 00 50 mov r0, 0x5000
200D 01 01 00 01 mov r1, 0x0100
2011 09 read r0, r1
2012 00 03 00 mov r3, r0
2015 01 02 00 37 mov r2, 0x3700
2019 01 0D 00 59 mov r13, 0x5900
201D 01 04 00 00 mov r4, 0x0000
2021 01 05 00 00 mov r5, 0x0000
2025 01 01 01 00 mov r1, 0x0001
2029 01 00 00 00 mov r0, 0x0000
loc_202D:
202D 04 04 01 add r4, r1
2030 00 06 02 mov r6, r2
2033 04 06 04 add r6, r4
2036 00 0B 06 mov r11, r6
2039 03 06 06 mov r6, byte[r6]
203C 04 05 06 add r5, r6
203F 02 0D 05 mov byte[r13], r5
2042 03 05 0D mov r5, byte[r13]
2045 00 07 02 mov r7, r2
2048 04 07 05 add r7, r5
204B 00 0C 07 mov r12, r7
204E 03 07 07 mov r7, byte[r7]
2051 02 0C 06 mov byte[r12], r6
2054 02 0B 07 mov byte[r11], r7
2057 00 08 06 mov r8, r6
205A 04 08 07 add r8, r7
205D 02 0D 08 mov byte[r13], r8
2060 03 08 0D mov r8, byte[r13]
2063 00 09 02 mov r9, r2
2066 04 09 08 add r9, r8
2069 03 09 09 mov r9, byte[r9]
206C 01 0A 00 50 mov r10, 0x5000
2070 04 0A 00 add r10, r0
2073 03 0A 0A mov r10, byte[r10]
2076 06 0A 09 xor r10, r9
2079 01 0B 00 48 mov r11, 0x4800
207D 04 0B 00 add r11, r0
2080 02 0B 0A mov byte[r11], r10
2083 04 00 01 add r0, r1
2086 07 00 03 cmp r0, r3
2089 08 2D 20 jnz loc_202D
208C 01 00 00 00 mov r0, 0
2090 01 01 00 48 mov r1, 0x4800
2094 01 02 15 39 mov r2, 0x3915
2098 01 03 57 00 mov r3, 0x0057
209C 01 0D 01 00 mov r13, 0x0001
loc_20A0:
20A0 03 04 01 mov r4, byte[r1]
20A3 03 05 02 mov r5, byte[r2]
20A6 04 01 0D add r1, r13
20A9 04 02 0D add r2, r13
20AC 07 04 05 cmp r4, r5
20AF 08 CE 20 jnz loc_20CE
20B2 04 00 0D add r0, r13
20B5 07 00 03 cmp r0, r3
20B8 08 A0 20 jnz loc_20A0
20BB 01 00 0D 39 mov r0, 0x390D
20BF 01 01 04 00 mov r1, 0x0004
20C3 0A write r0, r1
20C4 01 01 69 7A mov r1, 0x7A69
20C8 07 00 01 cmp r0, r1
20CB 08 E1 20 jnz loc_20E1
loc_20CE:
20CE 01 00 11 39 mov r0, 0x3911
20D2 01 01 04 00 mov r1, 0x0004
20D6 0A write r0, r1
20D7 01 01 69 7A mov r1, 0x7A69
20DB 07 00 01 cmp r0, r1
20DE 08 E1 20 jnz loc_20E1
loc_20E1:
20E1 0B halt

把它们转换成伪代码的形式:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
write("Enter Flag: ")
len = read(buf, 256)
r0 = r4 = r5 = 0
do{
r4 += 1
r6 = v_3700[r4]
r5 += r6
r7 = v_3700[r5]
v_3700[r5] = r6
v_3700[r4] = r7
v_4800[r0] = buf[r0] ^ v_3700[r6 + r7]
}while (++r0 != len)
r0 = 0
r1 = v_4800
r2 = v_3915
r3 = 0x57
do{
r4 = *(byte*)r1
r5 = *(byte*)r2
r1 += 1
r2 += 1
if (r4 != r5) goto loc_20CE
}while (++r0 != r3)
write("Yea\n")
halt()
loc_20CE:
write("Nah\n")
halt()

可以看出这是RC4加密,状态向量是v_3700,flag的长度是87,加密后放在v_4800,比较的内容是v_3915。那我们写个脚本再逆回去就好了:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
flag = [0x07, 0x2B, 0xC4, 0x5F, 0x11, 0xB1, 0xDC, 0x15, 0x28, 0xB7, 0x62, 0x59, 0xED, 0x3F, 0xAD, 0x9E, 0x38, 0x70, 0xD8, 0x68, 0xCA, 0xD3, 0x36, 0x67, 0x09, 0xD7, 0x16, 0x94, 0x11, 0xB1, 0x14, 0xB3, 0x34, 0xF5, 0x48, 0xE9, 0x64, 0x19, 0x2D, 0xC3, 0xA7, 0xB7, 0xB4, 0x10, 0xFB, 0x5C, 0xDF, 0x55, 0x18, 0xB5, 0xF3, 0x54, 0xC7, 0x51, 0x5D, 0xB0, 0x71, 0x13, 0xFE, 0x1F, 0x68, 0x25, 0x46, 0x63, 0xFD, 0x2E, 0xAA, 0x83, 0xF9, 0x0A, 0x2B, 0x04, 0xB0, 0x42, 0x15, 0x7A, 0x8D, 0x85, 0x02, 0x91, 0xDB, 0x41, 0xF9, 0xB4, 0xA6, 0x44, 0xE9]

box = [0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16]

r0, r4, r5 = 0, 0, 0
while True:
r4 += 1
r6 = box[r4]
r5 = r5 + r6 & 255
r7 = box[r5]
box[r5] = r6
box[r4] = r7
flag[r0] ^= box[r6 + r7 & 255]
r0 += 1
if r0 == len(flag): break

print(bytes(flag).decode('ascii'))

附上逆向时的IDB文件:附件下载

ctfzone{7h15_15_8451c411y_p427_0f_2c4_w21773n_1n_vm_1n_vm_8u7_w17h_435_580x_h4h4_funny}

TCTF2019 Elements

附件下载

IDA载入程序,容易发现flag的格式为'flag{%012x-%012x-%012x}' % (a, b, c)。然后$a$的值已知,要求$b$和$c$,约束条件是:

其中$T_1$,$T_2$是已知量,$d$满足:

然而我用z3写的脚本不太能解出来,于是尝试用别的办法。注意到$a$的值已知,而两个方程都是齐次的,于是可以先把$a$除掉,再来解二元方程组:

这样所有数量级就被处理掉了,常数都转化为了正常大小的小数。然后这个方程组手解可能挺麻烦的,好在我们可以用Walfram Alpha求它的解析解,进入网站后输入:

1
solve sqrt(x^2-(1+x^2-y^2)^2/4)/(1+x+y)=a,xy/sqrt(4x^2-(1+x^2-y^2)^2)=b

得到它的四组解析解。其中有两组复数解不管,另外两组只是$x$、$y$互换,选$x < y$的那组即可:

代入$T_1’$和$T_2’$的值,即可解出原来的$x$和$y$。把它们乘以$a$,可以求出$b$和$c$。但求出后我们不能直接得到flag,原因是程序有double数到格式串的转化过程,我们要按照它的方式生成格式串。查看关键代码:

1
2
3
4
v17 = (__m128i)_mm_sub_pd(
(__m128d)_mm_unpacklo_epi32((__m128i)(unsigned __int64)num, (__m128i)xmmword_400BD0),
(__m128d)xmmword_400BE0);
v23[v9++] = *(double *)_mm_shuffle_epi32(v17, 78).m128i_i64 + *(double *)v17.m128i_i64;// 1001110

num就是读入的abc。经分析,此过程实际上把它们转换成double型的变量。这几句代码可能是编译器对uint64_tdouble转换的内联代码。于是我们求出的值直接四舍五入然后转成12位十六进制数就行了:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def q2d(val):
b = memoryview(bytearray(8))
v, f = b.cast('Q'), b.cast('d')
v[0] = val
return f[0]

sqrt = lambda x: x ** (1/2)

unit = q2d(0x3FF0000000000000)
a, b = map(q2d, (0x42B1A4FF41C1018B, 0x42C5B939050828F4))
c = 0x391BC2164F0A
scale = c * unit
a /= scale; b /= scale
a2, b2, ab, a2b2 = a * a, b * b, a * b, a * a * b * b
x = sqrt(4*a2b2-a2)-sqrt(4*sqrt(4*a2b2-a2)-4*a2-8*ab+1)/2+(4*ab+1)/2
y = sqrt(4*a2b2-a2)+sqrt(4*sqrt(4*a2b2-a2)-4*a2-8*ab+1)/2+(4*ab+1)/2
x, y = map(lambda x: int(x / unit * scale), (x, y))
print('flag{%012X-%012X-%012X}' % (c, x, y))

flag{391BC2164F0A-4064E4798769-56E0DE138176}

TCTF2019 fixed_point

附件下载

程序接受一个格式为flag{长度为32的小写hex字符串}的flag。随后程序把它换成flag{hex表示的16字节二进制数据},并求出它的CRC128,如果求出的值与原flag值相等,则校验通过。python代码表示为:

1
2
3
assert flag.startswith('flag{') and flag.endswith('}')
raw = unhexlify(flag[5:-1])
assert raw == CRC128(b'flag{'+raw+b'}').to_bytes(16, 'little')

我一开始想到的是Z3求解,因为之前写过一个已知CRC32枚举原字符串的题,那个用Z3确实能胜任。于是我故技重施,写出了如下的代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import z3

def z3_crc128(data, length):
result = z3.BitVecVal(-1, 128)
for _, byte in zip(range(length), data):
result ^= z3.ZeroExt(120, byte)
for _ in range(8):
result = z3.If(
result & 1 == 1,
z3.LShR(result, 1) ^ 0xB595CF9C8D708E2166D545CF7CFDD4F9,
z3.LShR(result, 1)
)
return result

flag = z3.Array('flag', z3.IntSort(), z3.BitVecSort(8))
solver = z3.Solver()
for i, v in enumerate(b'flag{'):
solver.add(flag[i] == v)
solver.add(flag[21] == ord('}'))
crc = z3_crc128(flag, 22)
for i in range(16):
solver.add(flag[5 + i] == z3.Extract(i * 8 + 7, i * 8, crc))
assert solver.check() == z3.sat
print('flag{%032x}' % solver.model().eval(crc).as_long())

结果不太乐观,脚本挂了一天一夜也没能跑出来,那么只能尝试别的方法了。然后我又试了下面的:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import z3

def z3_crc128c(data, length):
crctable = z3.Array('crctable', z3.BitVecSort(8), z3.BitVecSort(128))
for i in range(256):
value = i
for _ in range(8):
if value & 1:
value = value >> 1 ^ 0xB595CF9C8D708E2166D545CF7CFDD4F9
else: value >>= 1
crctable = z3.Store(crctable, i, value)
result = z3.BitVecVal(-1, 128)
for _, byte in zip(range(length), data):
result = z3.LShR(result, 8) ^ crctable[z3.Extract(7, 0, result) ^ byte]
return result

flag = z3.Array('flag', z3.IntSort(), z3.BitVecSort(8))
for i, v in enumerate(b'flag{'):
flag = z3.Store(flag, i, v)
flag = z3.Store(flag, 21, ord('}'))
solver = z3.Solver()
crc = z3_crc128c(flag, 22)
solver.add(z3.Concat(*(flag[i] for i in reversed(range(5, 21)))) == crc)
assert solver.check() == z3.sat
r128 = lambda x: int.from_bytes(x.to_bytes(16, 'little'), 'big')
print('flag{%032x}' % r128(solver.model().eval(crc).as_long()))

还是不行。就这么挣扎了几天,最后只能去网上找WP,找到了这个。按照上面所说的CRC的性质,构造一个CRC128矩阵,然后解128个异或方程。但是我用Z3好像还是解不出来,也不知道为啥,于是我用传统的高斯消元法弄了一下,最后得到了flag。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from binascii import hexlify

crctable = []
for i in range(256):
value = i
for _ in range(8):
if value & 1:
value = value >> 1 ^ 0xB595CF9C8D708E2166D545CF7CFDD4F9
else: value >>= 1
crctable.append(value)

def crc128(data):
global crctable
result = -1 % (1 << 128)
for byte in data:
result = result >> 8 ^ crctable[(result ^ byte) & 255]
return result

def _crc128(data):
return crc128(data) ^ crc128(bytes(22))

# solve mx = v
def solve(m, v):
d = len(m)
for a in range(d - 1, 0, -1):
for t in range(a, -1, -1):
if m[t][a] == 1:
break
else: assert 0
m[a], m[t] = m[t], m[a]
v[a], v[t] = v[t], v[a]
for i in range(a):
if m[i][a] == 1:
for j in range(d):
m[i][j] ^= m[a][j]
v[i] ^= v[a]
r = [None] * d
for i in range(d):
r[i] = v[i]
for j in range(i):
r[i] ^= r[j] & m[i][j]
return r

bittest = lambda x, p: (x & (1 << p)) and 1 or 0
matrix = [[None for _ in range(128)] for _ in range(128)]
xor_data = crc128(bytes(22)) ^ _crc128(b'flag{'+bytes(16)+b'}')
crcs = tuple(_crc128(bytes(5 + (i >> 3)) + (bytes((1 << (i & 7),)) + bytes(16 - (i >> 3)))) for i in range(128))
for j in range(128):
for i in range(128):
matrix[i][j] = bittest(crcs[j], i)
if i == j:
matrix[i][j] = 1 ^ matrix[i][j]
vec = list(map(lambda i: bittest(xor_data, i), range(128)))
m2 = [list(m) for m in matrix]
v2 = list(vec)
result = solve(matrix, vec)
for i, m in enumerate(m2):
x = v2[i]
for i in range(128):
x ^= result[i] & m[i]
assert x == 0
flag_fmt = b'flag{%s}'
content = bytearray()
for i in range(16):
x = 0
for j in range(8):
x |= result[i << 3 | j] << j
content.append(x)
flag = flag_fmt % content
assert content == crc128(flag).to_bytes(16, 'little')
print((flag_fmt % hexlify(content)).decode('ascii'))

这个题我感觉还是很有用的,学到了这样一个性质(xy是同长度的位向量):

1
CRC(x ^ y) = CRC(x) ^ CRC(y)

用这个我们就可以实现文件CRC自校验了。还有,我们利用这一性质可以生成Zip Quine,就是这篇文章中提到的,简单的来说,就是一个Zip文件,它在解压后的结果是它自身,也就是无论我们解压这个文件多少次,最终解压结果都和源文件没有区别,就像这个附件:
附件下载

flag{670344c379b7f7fa4555a50fbabaefa4}

第五空间 Universal

wherever && universal

附件下载

这个附件没有后缀,我一开始以为这是个ELF文件,但DiE扫描结果显示这是PE文件?!!我赶紧用记事本打开代码,确实是MZ头,但是——

奇怪的PE文件

感觉DOS头有点奇怪,而且为什么PE文件会有shell代码?继续往下翻,看到一个网址:

一个网址

访问这个网址,里面介绍这是一个APE文件,即在Windows上能以PE文件运行,在Linux上能以Shell脚本文件运行。这个设计非常巧妙,它忽略了原来的DOS头,并让它被Linux解释成赋值语句,这样两种平台都能正确处理文件,运行高效的Native代码而不是脚本。用IDA载入程序,能显示函数,但IDA没有正确显示参数列表。查看对应的汇编,发现这个调用约定是Linux的,也就是通过rdirsirdxrcxr8r9传递参数。而IDA不知道这一点,于是参数解析就乱套了。在网上找了一圈,没能找到怎么修改IDA的解析方式,那我们只能先从Linux版本入手了。

Linux中输入sh Challenge运行这个程序,发现没有任何结果,也没有错误代码。但是我们可以按Shell脚本的方式修改这个文件,比如把它的第二句改成sleep 10(但是要注意用十六进制编辑器改,否则数据就错位了),发现程序确实暂停了10秒。观察这个脚本,发现程序如果没有检测到ape命令,就把自己的ape文件提取到临时目录,然后运行这个文件,以程序自身和其余参数为参数执行。我们可以先把Shell脚本修改一下,使之能把ape提取到我们已知的目录,最终我们得到了一个8KB的文件。文件下载

那我们直接运行这个文件,以Challenge为参数,没有错误,但不知道是否运行成功。用IDA查看里面仅有的几个函数,没发现提取ELF的,那应该就是内存加载了。给这个程序下断点,从IDA远程运行,按F8执行程序直到rip指向的地址发生突变(比如跳到另一个内存段),可以看到最后rip指向0x200C9D处的retn。随后程序就跳到了0x4023C2处。Ctrl+S查看段名,正是我们要的Challenge。

然后我们就可以构造一个能让IDA分析的ELF了。把这些段抠出来,然后写了empty.c

1
2
3
4
#include <stdint.h>
const uint8_t __attribute__((section(".text"))) code[] = {...};
uint8_t __attribute__((section(".data"))) data[] = {...};
uint8_t __attribute__((section(".data"))) data_pad[0xA000];

这个程序不需要写main函数,因为我们稍后手动指定程序的入口点。运行下面的命令,成功制作出可以运行的ELF文件:

1
gcc empty.c -c -o chall.o && ld -static -e 0x4023C2 -o chall chall.o && rm chall.o

这时比赛已经结束了,在这之前,@zsky大佬通过动态调试PE发现flag是通过命令行的方式输入的,而且输入正确会显示Good Job,关键函数是sub_402747。我们直接利用这一结论,查看此函数,与之前直接分析PE相比,这次IDA能正常识别函数参数了,那么ELF的制作应该是成功了!

这个文件不能替代原始文件,因为它无法感知输入命令行的参数。但是我们可以静态分析这个程序,定位到关键函数,稍微处理一下符号名:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
void __fastcall checkflag(int argc, const char **argv)
{
__int64 v2; // rcx
_DWORD *v3; // rdi
const char *v4; // rax
__int128 v5; // xmm1
int v6; // ebx
__int64 v7; // rdi
char v8; // al
__int64 v9; // rax
__int64 v10; // rsi
unsigned __int8 v11; // al
int v12[8]; // [rsp+0h] [rbp-70h] BYREF
_BYTE flag[33]; // [rsp+20h] [rbp-50h] BYREF

if ( argc == 2 )
{
v2 = 8LL;
v3 = &flag[32];
while ( v2 )
{
*v3++ = 0;
--v2;
}
v4 = argv[1];
v5 = *((_OWORD *)v4 + 1);
*(_OWORD *)flag = *(_OWORD *)v4;
*(_OWORD *)&flag[16] = v5;
if ( strlen(flag) == 32 )
{
v6 = 0;
*(_QWORD *)v12 = 0x9E3779B9DEADBEEFLL;
*(_QWORD *)&v12[4] = 0x9E3779B9DEADBEEFLL;
*(_QWORD *)&v12[2] = 0xBEEFDEEDC6EF3720LL;
*(_QWORD *)&v12[6] = 0xBEEFDEEDC6EF3720LL;
sub_40553C((unsigned int *)flag, (int *)&flag[12], v12);
sub_40553C((unsigned int *)&flag[4], (int *)&flag[8], v12);
sub_40553C((unsigned int *)&flag[16], (int *)&flag[24], v12);
sub_40553C((unsigned int *)&flag[20], (int *)&flag[28], v12);
v7 = (unsigned __int8)add(flag[0], flag[31]);
do
{
v8 = v6++;
v9 = v8 & 0x1F;
v10 = (unsigned int)v7 ^ (unsigned __int8)~(flag[v9] ^ flag[v6 & 0x1F]);
flag[v9] = v7 ^ ~(flag[v9] ^ flag[v6 & 0x1F]);
v11 = add(v7, v10);
v7 = v11;
LOBYTE(v7) = v11 ^ 0xA5;
}
while ( v6 != 64 );
if ( !(unsigned int)memcmp((unsigned __int8 *)check, flag, 32uLL) )
((void (*)(void))sub_405595)();
}
}
}

然后就可以写出对应的脚本:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from binascii import unhexlify

flag = memoryview(bytearray(b'00000000111111112222222233333333'))
maps = (
(0x4, 0xE, 0x0, 0xF, 0x9, 0x3, 0x2, 0xD, 0x5, 0x1, 0xC, 0x8, 0x7, 0xA, 0xB, 0x6),
(0xD, 0x2, 0x9, 0x0, 0xF, 0x3, 0xA, 0xB, 0x4, 0x5, 0x6, 0xE, 0x7, 0xC, 0x1, 0x8),
(0xF, 0xC, 0xA, 0x6, 0x0, 0x9, 0x5, 0xE, 0x2, 0xB, 0x4, 0x7, 0x8, 0x1, 0xD, 0x3),
(0xD, 0x8, 0x9, 0x5, 0x1, 0x4, 0x2, 0x0, 0x6, 0xA, 0x7, 0xB, 0xC, 0x3, 0xE, 0xF),
(0xB, 0x3, 0xE, 0x5, 0x6, 0x9, 0x1, 0x7, 0x0, 0xD, 0xC, 0x4, 0x8, 0x2, 0xA, 0xF),
(0x1, 0x0, 0x9, 0xB, 0xF, 0x5, 0xC, 0x4, 0xE, 0xD, 0xA, 0x8, 0x2, 0x6, 0x3, 0x7),
(0x3, 0xA, 0x5, 0x8, 0x4, 0xB, 0xD, 0xC, 0x7, 0x6, 0x0, 0x1, 0x2, 0x9, 0xE, 0xF),
(0x3, 0xF, 0xE, 0xA, 0x4, 0x7, 0x0, 0x9, 0x1, 0xD, 0x6, 0x2, 0x5, 0xC, 0xB, 0x8),
)
# umaps[t][maps[t][i]] == i
umaps = tuple(map(
lambda m: tuple(map(
lambda e: e[0],
sorted(enumerate(m), key=lambda e: e[1])
)), maps
))
map2 = (*range(8), *reversed(range(8)), *reversed(range(8)), *reversed(range(8)))
map3 = (0xDEADBEEF, 0x9E3779B9, 0xC6EF3720, 0xBEEFDEED, 0xDEADBEEF, 0x9E3779B9, 0xC6EF3720, 0xBEEFDEED)

def shuffle(v, maps):
r = 0
for i in range(8):
r |= maps[i][v >> (i << 2) & 15] << (i << 2)
return r

# encrypt
def round(v1, v2):
for i in map2:
v1, v2 = shuffle(map3[i] ^ v1 ^ v2, maps), v1
return v2, v1

# decrypt
def uround(v1, v2):
v1, v2 = v2, v1
for i in reversed(map2):
v1, v2 = v2, shuffle(v1, umaps) ^ map3[i] ^ v2
return v1, v2

# encrypt part 1
view = flag.cast('L')
view[0], view[3] = round(view[0], view[3])
view[1], view[2] = round(view[1], view[2])
view[4], view[6] = round(view[4], view[6])
view[5], view[7] = round(view[5], view[7])

# encrypt part 2
view = flag.cast('B')
sum = view[0] + view[31] & 255
for i in range(64):
p, q = i & 31, (i + 1) & 31
view[p] ^= sum ^ view[q] ^ 255
sum = (sum + view[p] ^ 0xA5) & 255

# check simulation result
assert flag == unhexlify('49aac7d93975a0d180c3d175a1250e6ab98ebb23f163421071d343657bed1954')

# encrypted flag
flag = memoryview(bytearray((
0xC2, 0x3A, 0x86, 0xC1, 0x44, 0x07, 0x13, 0x0C, 0x7B, 0xBE, 0x1A, 0x6D, 0xCB, 0xFA, 0x26, 0x99,
0x62, 0x7C, 0x82, 0x66, 0x9F, 0x1C, 0xD9, 0x99, 0x44, 0xC3, 0xB7, 0x1D, 0x67, 0x3C, 0x7B, 0x80
)))
view = flag.cast('B')
# seems irreversible encrypt? enumerate final sum
for sum in range(256):
viewc = memoryview(bytearray(view))
for i in reversed(range(64)):
p, q = i & 31, (i + 1) & 31
sum = (sum ^ 0xA5) - viewc[p] & 255
viewc[p] ^= sum ^ viewc[q] ^ 255
# check initial sum
if sum == (viewc[0] + viewc[31] & 255):
viewc = viewc.cast('L')
viewc[0], viewc[3] = uround(viewc[0], viewc[3])
viewc[1], viewc[2] = uround(viewc[1], viewc[2])
viewc[4], viewc[6] = uround(viewc[4], viewc[6])
viewc[5], viewc[7] = uround(viewc[5], viewc[7])
# check ascii flag
if all(32 <= c < 127 for c in viewc.cast('B')):
print(bytes(viewc).decode('ascii'))

加密分为两个阶段:首先把flag切分为4组,每组两个32位整数,把它们分别进行费斯妥加密(大概)。然后再进行两轮按字节循环加密学术名字叫什么我也不知道。然而,我在后面解密的时候发现,第二轮加密是不可逆的???好在枚举并不复杂,约束条件也比较好找,只要保证最后是ascii字符就可以了。

S3hr0din7er_3_Uni4er3al_D00rs111