文章总结: 本文介绍使用Angr符号执行对付ARMAARCH64平台CFF混淆的技术方法,提供了完整的some_cff_patch.py脚本,通过识别有效块、执行符号执行和修补二进制文件来还原控制流。文章还演示了使用GAMBA工具处理SUB(指令替换)混淆的方法,并给出了针对MBA化简的实际案例。对于类似样本,建议在现有脚本基础上修改,注意识别有效块和修补时避免破坏原有效代码。 综合评分: 94 文章分类: 二进制安全,逆向分析,代码审计,安全工具,漏洞分析
Angr符号执行练习–对付ARM AARCH64 CFF
原创
沈沉舟
青衣十三楼飞花堂
2025年12月23日 07:19 北京
创建: 2025-09-29 10:45
更新: 2025-09-30 10:00
链接: https://scz.617.cn/unix/202509291045.txt
目录:
☆ 背景介绍
☆ some_cff_patch.py
☆ 用GAMBA对付SUB
☆ 背景介绍
参看
OLLVM CFF去平坦化原理 - lyl610abc [2025-8-27]
https://www.52pojie.cn/thread-2056288-1-1.html
上文提供了ARM AARCH64平台的CFF测试用例libmmap_exec.so,作者给了混淆前后的so,据此可检查反CFF结果是否符合预期。我预处理过样本,得到更小的测试用例,foo()、bar()、baz()均有CFF。为什么这么干呢?因为Angr脚本处理原始so非常耗时,具体是proj.analyses.CFG()这一步非常耗时,还会触发许多警告。裁剪后的so,可让我们聚焦被混淆过的目标函数,快速完成符号执行。
IDA的D810插件可对付some_cff_small.so,既对付CFF也对付SUB。本文演示Angr符号执行反AARCH64 CFF。SUB则用GAMBA手工化简之。
完整测试用例打包
https://scz.617.cn/unix/202509291045.txt
https://scz.617.cn/unix/202509291045.7z
☆ some_cff_patch.py
some_cff_patch.py实际源自
https://github.com/cq674350529/deflat/blob/master/flat_control_flow/deflat.py
am_graph模块实际源自
https://github.com/angr/angr-management/blob/master/angrmanagement/utils/graph.py
import sys, struct, collections
import angr, claripy, pyvex
import am_graph
def get_func_from_addr ( proj, addr ) :
try :
return proj.kb.functions.get_by_addr( addr )
except KeyError :
return proj.kb.functions.floor_func( addr )
def get_insns_of_node ( proj, node ) :
buf = proj.loader.memory.load( node.addr, node.size )
insns = list( proj.arch.capstone.disasm( buf, node.addr ) )
return insns
def get_cond_jmp ( proj, node ) :
ret = None
CONDITIONAL_BRANCH_MNEMONICS \
= {
'CBZ',
'CBNZ',
'TBZ',
'TBNZ'
}
insns = get_insns_of_node( proj, node )
insn = insns[-1]
mnemonic = insn.mnemonic.upper()
is_b_cond = mnemonic.startswith( 'B.' ) and mnemonic not in ( 'B', 'BL' )
is_other_cond \
= mnemonic in CONDITIONAL_BRANCH_MNEMONICS
if is_b_cond or is_other_cond :
ret = insn
return ret
def get_some_nodes_0 ( supergraph, threshold ) :
prologue_node = None
retn_node = None
pre_dispatcher_nodes \
= []
dispatcher_nodes \
= []
for node in supergraph.nodes() :
if 0 == supergraph.in_degree( node ) :
assert prologue_node is None
prologue_node = node
if 0 == supergraph.out_degree( node ) and len( node.out_branches ) == 0 :
assert retn_node is None
retn_node = node
if 1 == supergraph.out_degree( node ) and \
len( node.out_branches ) == 1 and \
supergraph.in_degree( node ) >= threshold :
pre_dispatcher_nodes.append( node )
dispatcher_nodes.append( list( supergraph.successors( node ) )[0] )
assert prologue_node is not None
assert retn_node is not None
assert pre_dispatcher_nodes
assert dispatcher_nodes
print( 'prologue_node: %#x' % prologue_node.addr )
print( 'retn_node: %#x' % retn_node.addr )
print( f'pre_dispatcher_nodes[{len(pre_dispatcher_nodes)}]:' )
for i, node in enumerate( pre_dispatcher_nodes ) :
print( f'[{i}] {node.addr:#x} ({node.size})' )
print( f'dispatcher_nodes[{len(dispatcher_nodes)}]:' )
for i, node in enumerate( dispatcher_nodes ) :
print( f'[{i}] {node.addr:#x} ({node.size})' )
return prologue_node, retn_node, pre_dispatcher_nodes, dispatcher_nodes
def get_some_nodes_1 ( proj, supergraph, pre_dispatcher_nodes, prologue_node, retn_node ) :
relevant_nodes = []
nop_nodes = []
pre_dispatcher_addrs = {node.addr for node in pre_dispatcher_nodes}
for node in supergraph.nodes() :
if node.addr == prologue_node.addr or \
node.addr == retn_node.addr or \
node.addr in pre_dispatcher_addrs :
continue
predecessors = list( supergraph.predecessors( node ) )
if not len( predecessors ) :
continue
for predecessor in predecessors :
insn = get_cond_jmp( proj, predecessor )
if insn is not None :
mnemonic = insn.mnemonic.upper()
if mnemonic == 'B.EQ' :
successors = list( supergraph.successors( predecessor ) )
target = int( insn.op_str[1:], 16 )
assert successors[0].addr == target
if node.addr == successors[0].addr :
relevant_nodes.append( node )
if node not in relevant_nodes :
nop_nodes.append( node )
assert relevant_nodes
print( f'nop_nodes[{len(nop_nodes)}]:' )
for i, node in enumerate( nop_nodes ) :
print( f'[{i}] {node.addr:#x} - {node.addr+node.size:#x} ({node.size})' )
print( f'relevant_nodes[{len(relevant_nodes)}]:' )
for i, node in enumerate( relevant_nodes ) :
print( f'[{i}] {node.addr:#x} - {node.addr+node.size:#x} ({node.size})' )
return relevant_nodes, nop_nodes
def symbolic_execution ( proj, keep_blocks, start_addr, hook_addrs, set_value=None ) :
def retn_procedure ( state ) :
proj.unhook( state.addr )
return
def statement_inspect ( state ) :
expressions = list( state.scratch.irsb.statements[state.inspect.statement].expressions )
if len( expressions ) != 0 and isinstance( expressions[0], pyvex.expr.ITE ) :
state.scratch.temps[expressions[0].cond.tmp] = set_value
state.inspect._breakpoints['statement'] = []
if hook_addrs :
for addr in hook_addrs :
proj.hook( addr, retn_procedure, length=4 )
init_state = proj.factory.blank_state(
addr = start_addr,
add_options = {
angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS,
angr.options.BYPASS_UNSUPPORTED_SYSCALL,
},
remove_options = {
angr.options.LAZY_SOLVES,
}
)
if set_value is not None :
init_state.inspect.b( 'statement', when=angr.BP_BEFORE, action=statement_inspect )
sm = proj.factory.simulation_manager( init_state )
sm.step()
while len( sm.active ) > 0 :
for state in sm.active :
if state.addr in keep_blocks :
return state.addr
sm.step()
return None
def get_flow ( proj, prologue_node, relevant_nodes, retn_node ) :
symbolic_execution_target \
= [prologue_node]
symbolic_execution_target.extend( relevant_nodes )
keep_blocks = [node.addr for node in relevant_nodes]
keep_blocks.extend( [retn_node.addr,] )
print( f'keep_blocks[{len(keep_blocks)}]:' )
for i, addr in enumerate( keep_blocks ) :
print( f'[{i}] {addr:#x}' )
keep_blocks = set( keep_blocks )
flow = collections.defaultdict( list )
ins_dict = {}
for node in symbolic_execution_target :
block = proj.factory.block( node.addr, size=node.size )
has_branch = False
hook_addrs = set()
for ins in block.capstone.insns :
if ins.mnemonic.startswith( 'csel' ) :
if node not in ins_dict :
ins_dict[node] = ins
has_branch = True
elif ins.mnemonic in { 'bl', 'blr' }:
hook_addrs.add( ins.address )
if has_branch :
next_addr = symbolic_execution(
proj,
keep_blocks,
node.addr,
hook_addrs,
claripy.BVV( 1, 1 )
)
if next_addr is not None :
flow[node].append( next_addr )
next_addr = symbolic_execution(
proj,
keep_blocks,
node.addr,
hook_addrs,
claripy.BVV( 0, 1 )
)
if next_addr is not None :
flow[node].append( next_addr )
else :
next_addr = symbolic_execution(
proj,
keep_blocks,
node.addr,
hook_addrs
)
if next_addr is not None :
flow[node].append( next_addr )
print( f'flow[{len(flow)}]:' )
for i, ( k, v ) in enumerate( flow.items() ) :
print( '[%d] %#x - %#x (%d) ->' % ( i, k.addr, k.addr+k.size, k.size ), [hex(child) for child in v] )
return ( flow, ins_dict, )
OPCODES = {
'eq' : 0x0,
'ne' : 0x1,
'hs' : 0x2,
'lo' : 0x3,
'mi' : 0x4,
'pl' : 0x5,
'vs' : 0x6,
'vc' : 0x7,
'hi' : 0x8,
'ls' : 0x9,
'ge' : 0xa,
'lt' : 0xb,
'gt' : 0xc,
'le' : 0xd,
'nop' : b'\x1f\x20\x03\xd5',
}
def fill_nop ( proj, buf, addr, size ) :
nop = OPCODES['nop']
if proj.arch.memory_endness == "Iend_BE" :
nop = nop[::-1]
off = proj.loader.main_object.addr_to_offset( addr )
for i in range( 0, size, 4 ) :
buf[off+i] = nop[0]
buf[off+i+1] = nop[1]
buf[off+i+2] = nop[2]
buf[off+i+3] = nop[3]
def get_j_ins ( f_addr, t_addr, j_type ) :
if 'b' == j_type :
if f_addr > t_addr :
j_ins = struct.pack( '<I', ( ( 0x14000000 | 0x03ffffff ) - ( f_addr - t_addr - 4 ) // 4 ) )
else :
j_ins = struct.pack( '<I', ( ( 0x14000000 & 0xfc000000 ) + ( t_addr - f_addr ) // 4 ) )
else :
j_off = ( ( ( t_addr - f_addr ) // 4 ) << 5 ) & 0x00ffffe0
j_opcode = OPCODES[j_type.lower()]
j_ins = struct.pack( '<I', 0x54000000 | j_off | j_opcode )
return j_ins
def patch_ins ( proj, buf, addr, ins ) :
off = proj.loader.main_object.addr_to_offset( addr )
size = len( ins )
buf[off:off+size] \
= ins
def patch_buf ( proj, buf, nop_nodes, flow, ins_dict ) :
for node in nop_nodes :
fill_nop( proj, buf, node.addr, node.size )
for parent, children in flow.items() :
if 1 == len( children ) :
insns = get_insns_of_node( proj, parent )
insn = insns[-1]
mnemonic = insn.mnemonic.upper()
addr = insn.address
if mnemonic != 'B' :
addr += 4
j_ins = get_j_ins( addr, children[0], 'b' )
if proj.arch.memory_endness == "Iend_BE" :
j_ins = j_ins[::-1]
print( 'Patch %#x => %#x' % ( addr, children[0] ) )
patch_ins( proj, buf, addr, j_ins )
else :
ins = ins_dict[parent]
j_ins = get_j_ins( ins.address, children[0], ins.op_str.split(',')[-1].strip() )
if proj.arch.memory_endness == "Iend_BE" :
j_ins = j_ins[::-1]
print( 'Patch %#x => %#x, %#x' % ( ins.address, children[0], children[1] ) )
patch_ins( proj, buf, ins.address, j_ins )
j_ins = get_j_ins( ins.address+4, children[1], 'b' )
if proj.arch.memory_endness == "Iend_BE" :
j_ins = j_ins[::-1]
patch_ins( proj, buf, ins.address+4, j_ins )
def dosth ( proj, buf, addr ) :
print( f'func {addr:#x}' )
func = get_func_from_addr( proj, addr )
supergraph = am_graph.to_supergraph( func.transition_graph )
prologue_node, retn_node, pre_dispatcher_nodes, dispatcher_nodes \
= get_some_nodes_0( supergraph, 4 )
print( "" )
relevant_nodes, nop_nodes \
= get_some_nodes_1( proj, supergraph, pre_dispatcher_nodes, prologue_node, retn_node )
print( "" )
flow, ins_dict = get_flow( proj, prologue_node, relevant_nodes, retn_node )
print( "" )
patch_buf( proj, buf, nop_nodes, flow, ins_dict )
print( "" )
def main ( argv ) :
base_addr = 0
proj = angr.Project(
argv[1],
load_options = {
'auto_load_libs' : False,
'main_opts' : {
'base_addr' : base_addr
}
}
)
cfg = proj.analyses.CFG(
force_smart_scan = False,
force_complete_scan = True,
normalize = True,
resolve_indirect_jumps \
= True,
fail_fast = True
)
with open( argv[1], 'rb' ) as f :
buf = bytearray( f.read() )
origsize = len( buf )
addrlist = ( 0x51020, 0x51290, 0x514f0 )
for addr in addrlist :
dosth( proj, buf, addr )
assert len( buf ) == origsize
with open( argv[2], 'wb' ) as f :
f.write( buf )
if "__main__" == __name__ :
main( sys.argv )
暂不清楚some_cff_small.so使用何种CFF工具生成,不能简单套用标准OLLVM CFF的反混淆过程。针对此特例,可检查汇编指令,B.EQ指令的跳转目标即”有效块”,另一分支可舍弃。符号执行与以前的套路相同,x86需要Hook call,ARM需要Hook bl或blr。恢复控制流与以前的套路相同,x86关注cmov,ARM关注csel。Patch时,对len(children)为1的情形,检查parent最后一条指令,若非b指令,需Patch下一条指令,避免破坏原有效代码,这是针对此特例的工程实践踩过的坑。
将来碰上其他ARM AARCH64样本,可在some_cff_patch.py基础上修改,一是寻找识别”有效块”的办法,二是注意Patch时勿破坏原有效代码。细节部分需要具体样本具体分析,整体思路不会有大变化。
some_cff_patch.py含有历史遗迹代码,出于某些个人考虑,未精简。
$ python3 some_cff_patch.py some_cff_small.so some_cff_new.so
输出较多,只展示foo()的信息
func 0x51020
prologue_node: 0x51020
retn_node: 0x51278
pre_dispatcher_nodes[2]:
[0] 0x51288 (4)
[1] 0x511ac (4)
dispatcher_nodes[2]: // 实际未使用
[0] 0x51048 (24)
[1] 0x510e4 (24)
nop_nodes[10]:
[0] 0x51048 - 0x51060 (24)
[1] 0x51060 - 0x51078 (24)
[2] 0x510e4 - 0x510fc (24)
[3] 0x51078 - 0x51090 (24)
[4] 0x510fc - 0x51114 (24)
[5] 0x51090 - 0x510a8 (24)
[6] 0x51114 - 0x5112c (24)
[7] 0x510a8 - 0x510b0 (8)
[8] 0x5112c - 0x51144 (24)
[9] 0x51144 - 0x5114c (8)
relevant_nodes[7]:
[0] 0x510b0 - 0x510e4 (52)
[1] 0x511d8 - 0x5124c (116)
[2] 0x5114c - 0x51174 (40)
[3] 0x51194 - 0x511ac (24)
[4] 0x5124c - 0x51278 (44)
[5] 0x51174 - 0x51194 (32)
[6] 0x511b0 - 0x511d8 (40)
keep_blocks[8]:
[0] 0x510b0
[1] 0x511d8
[2] 0x5114c
[3] 0x51194
[4] 0x5124c
[5] 0x51174
[6] 0x511b0
[7] 0x51278
flow[8]:
[0] 0x51020 - 0x51048 (40) -> ['0x510b0']
[1] 0x510b0 - 0x510e4 (52) -> ['0x5114c']
[2] 0x511d8 - 0x5124c (116) -> ['0x5124c']
[3] 0x5114c - 0x51174 (40) -> ['0x51194', '0x51174']
[4] 0x51194 - 0x511ac (24) -> ['0x511b0']
[5] 0x5124c - 0x51278 (44) -> ['0x510b0']
[6] 0x51174 - 0x51194 (32) -> ['0x511b0']
[7] 0x511b0 - 0x511d8 (40) -> ['0x511d8', '0x51278']
Patch 0x51048 => 0x510b0
Patch 0x510e4 => 0x5114c
Patch 0x51248 => 0x5124c
Patch 0x51168 => 0x51194, 0x51174
Patch 0x511ac => 0x511b0
Patch 0x51274 => 0x510b0
Patch 0x51190 => 0x511b0
Patch 0x511cc => 0x511d8, 0x51278
☆ 用GAMBA对付SUB
用IDA64分析some_cff_new.so
__int64 __fastcall foo(__int64 a1, __int64 a2, char a3)
{
int i;
for ( i = 0; i < (unsigned __int64)strlen(a1); ++i )
*(_BYTE *)(a2 + i) = (~*(_BYTE *)(a1 + i) & 0xC2 | *(_BYTE *)(a1 + i) & 0x3D) ^ (~a3 & 0xC2 | a3 & 0x3D);
return 123LL;
}
__int64 __fastcall bar(__int64 a1, __int64 a2, char a3)
{
int i;
for ( i = 0; i < (unsigned __int64)strlen(a1); ++i )
*(_BYTE *)(a2 + i) = (~*(_BYTE *)(a1 + i) & 0xB3 | *(_BYTE *)(a1 + i) & 0x4C) ^ (~a3 & 0xB3 | a3 & 0x4C);
return 610LL;
}
__int64 __fastcall baz(__int64 a1, __int64 a2, unsigned int a3, unsigned int a4, unsigned int a5, __int64 a6)
{
if ( a6 < 0 )
return 0LL;
if ( ~(~(a2 + 4095) | 0xFFF) < a2 )
return 0LL;
return _mmap(a1, a2, a3, a4, a5, a6);
}
some_cff_new.so中已经没有CFF了,但有”指令替换”,涉及MBA。
从IDA 8.3起,有内置插件gooMBA,某些情况下可化简MBA。官方blog与众多MBA化简工具比较后,把gooMBA吹得天花乱坠的,但我用some_cff_new.so实测,未能化简MBA,失望。相比之下,D810、GAMBA均成功化简some_cff_new.so中的MBA。
$ python3 /home/scz/src/ollvm/GAMBA/src/simplify_general.py -b 8 "(~x & 0xc2 | x & 0x3d) ^ (~a3 & 0xc2 | a3 & 0x3d)"
a3^x
$ python3 /home/scz/src/ollvm/GAMBA/src/simplify_general.py -b 8 "(~x & 0xb3 | x & 0x4c) ^ (~a3 & 0xb3 | a3 & 0x4c)"
a3^x
$ python3 /home/scz/src/ollvm/GAMBA/src/simplify_general.py -b 64 "~(~(a2 + 4095) | 0xfff)"
-4096&4095+a2
+的优先级高于&
作为对比,some_normal_small.so中这几个函数如下
__int64 __fastcall foo(__int64 a1, __int64 a2, char a3)
{
int i;
for ( i = 0; i < (unsigned __int64)strlen(a1); ++i )
*(_BYTE *)(a2 + i) = *(_BYTE *)(a1 + i) ^ a3;
return 123LL;
}
__int64 __fastcall bar(__int64 a1, __int64 a2, char a3)
{
int i;
for ( i = 0; i < (unsigned __int64)strlen(a1); ++i )
*(_BYTE *)(a2 + i) = *(_BYTE *)(a1 + i) ^ a3;
return 610LL;
}
__int64 __fastcall baz(__int64 a1, signed __int64 a2, unsigned int a3, unsigned int a4, unsigned int a5, __int64 a6)
{
if ( a6 < 0 )
return 0LL;
if ( (__int64)((a2 + 4095) & 0xFFFFFFFFFFFFF000LL) >= a2 )
return _mmap(a1, a2, a3, a4, a5, a6);
return 0LL;
}
免责声明:
本文所载程序、技术方法仅面向合法合规的安全研究与教学场景,旨在提升网络安全防护能力,具有明确的技术研究属性。
任何单位或个人未经授权,将本文内容用于攻击、破坏等非法用途的,由此引发的全部法律责任、民事赔偿及连带责任,均由行为人独立承担,本站不承担任何连带责任。
本站内容均为技术交流与知识分享目的发布,若存在版权侵权或其他异议,请通过邮件联系处理,具体联系方式可点击页面上方的联系我。
本文转载自:青衣十三楼飞花堂 沈沉舟《Angr符号执行练习–对付ARM AARCH64 CFF》
版权声明
本站仅做备份收录,仅供研究与教学参考之用。
读者将信息用于其他用途的,全部法律及连带责任由读者自行承担,本站不承担任何责任。










评论