Source code for asm_analyser.architectures.arm.processor

'''Provides methods for further processing of parsed ARM assembly.
'''
import re
import copy
from typing import List
from asm_analyser import processor
from asm_analyser.blocks.code_block import CodeBlock
from asm_analyser.blocks.basic_block import BasicBlock

COND_CODES = {
    'eq', 'ne', 'ge', 'gt', 'le', 'lt', 'ls', 'cs',
    'cc', 'hi', 'mi', 'pl', 'al', 'nv', 'vs', 'vc'
}


[docs]class ArmProcessor(processor.Processor): '''Implements the Processor Class for ARM assembly. '''
[docs] @staticmethod def create_ir(blocks: List[CodeBlock]) -> List[CodeBlock]: new_blocks = [] for block in blocks: new_block = copy.deepcopy(block) new_block.instructions = [] for instr in block.instructions: if re.match('(^ldr.*)|(^str.*)', instr[1]): byte_amount = '4' update = '' post_index = '' signed = '0' # unify argument length to 3 if len(instr[2]) == 2: instr = (*instr[:2], [*instr[2], '0']) # look for index updates (exclamation mark) if '!' in instr[2][2]: update = '1' else: update = '0' # look for post-indexed addressing if re.match( '\[(.*?)\]', instr[2][1]) and instr[2][2] != '0': post_index = '1' else: post_index = '0' opcode = instr[1] if opcode[-2:] in COND_CODES: opcode = opcode[:-2] if 'ldrb' in opcode and opcode[-2:] not in COND_CODES: byte_amount = '1' instr = ( instr[0], instr[1].replace( 'ldrb', 'ldr'), instr[2]) elif 'ldrsb' in opcode and opcode[-2:] not in COND_CODES: byte_amount = '1' signed = '1' instr = ( instr[0], instr[1].replace( 'ldrsb', 'ldr'), instr[2]) elif 'ldrh' in opcode and opcode[-2:] not in COND_CODES: byte_amount = '2' instr = ( instr[0], instr[1].replace( 'ldrh', 'ldr'), instr[2]) elif 'ldrsh' in opcode and opcode[-2:] not in COND_CODES: byte_amount = '2' signed = '1' instr = ( instr[0], instr[1].replace( 'ldrsh', 'ldr'), instr[2]) elif 'ldrd' in opcode and opcode[-2:] not in COND_CODES: byte_amount = '8' instr = ( instr[0], instr[1].replace( 'ldrd', 'ldr'), instr[2]) elif 'strb' in opcode and opcode[-2:] not in COND_CODES: byte_amount = '1' instr = ( instr[0], instr[1].replace( 'strb', 'str'), instr[2]) elif 'strh' in opcode and opcode[-2:] not in COND_CODES: byte_amount = '2' instr = ( instr[0], instr[1].replace( 'strh', 'str'), instr[2]) elif 'strd' in opcode and opcode[-2:] not in COND_CODES: byte_amount = '8' instr = ( instr[0], instr[1].replace( 'strd', 'str'), instr[2]) instr = ( instr[0], f'{instr[1]}{byte_amount}{update}{post_index}{signed}', instr[2]) elif re.match('(^ldm.*)|(^stm.*)', instr[1]): if 'ldmia' in instr[1]: instr = ( instr[0], instr[1].replace( 'ldmia', 'ldm'), instr[2]) elif 'stmia' in instr[1]: instr = ( instr[0], instr[1].replace( 'stmia', 'stm'), instr[2]) if '!' in instr[2][0]: instr = (instr[0], instr[1] + '1', instr[2]) else: instr = (instr[0], instr[1] + '0', instr[2]) # remove square brackets and exclamation mark if not re.match('^\.(word|ascii)$', instr[1]): for j in range(len(instr[2])): instr[2][j] = re.sub('[\\[\\]!\.]', '', instr[2][j]) # replace specifiers like :lower16: and :upper16: and LANCHOR for i, op in enumerate(instr[2]): if 'LANCHOR' in op: instr[2][i] = instr[2][i].replace( 'ANCHOR', 'C').replace('.', '') if ':lower16:' in op: val = instr[2][i].replace(':lower16:', '_asm_analysis_.') instr[2][i] = f'({val} & 0xffff)' if ':upper16:' in op: val = instr[2][i].replace(':upper16:', '_asm_analysis_.') instr[2][i] = f'((uint32_t){val} >> 16)' new_block.instructions.append(instr) if new_block.instructions: new_blocks.append(new_block) return new_blocks
[docs] @staticmethod def get_basic_blocks(blocks: List[CodeBlock]) -> List[BasicBlock]: basic_blocks = [] # create one or more basic blocks for each code block for code_block in blocks: if code_block.is_code: basic_block = BasicBlock() basic_block.parent_block = code_block.name # loop over the instructions and look for separating # instructions for i, instr in enumerate(code_block.instructions): basic_block.instructions.append(instr) # add basic block to list if branch instruction or end of # block occurs if (i == len(code_block.instructions)-1 or re.match( '^((b)|(bl)|(bx))(?:eq|ne|cs|hs|cc|lo|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al)*$', instr[1]) or (re.match( '(^ldr.*)|(^ldm.*)|(^pop.*)', instr[1]) and 'pc' in instr[2])): basic_blocks.append(basic_block) basic_block = BasicBlock() basic_block.parent_block = code_block.name return basic_blocks