Source code for asm_analyser.architectures.arm.instr_translator

'''Responsible for translating the ARM instructions.
'''
import re
from typing import List, Tuple
from asm_analyser.architectures.arm import auxiliary_functions
from asm_analyser.blocks.code_block import CodeBlock


[docs]def translate(code_blocks: List[CodeBlock], opcode: str, *args) -> str: '''Translates an arm instruction to C using a dictionary. Parameters ---------- code_blocks: list[CodeBlock] The code blocks containing all the instructions opcode : str Name of the instruction args : tuple(str) Operands for the instruction Returns ------- str The translated C code ''' args = [*args] input_args = args.copy() args = _add_pre_suffix(opcode, args) translation, args = _translate_shift(opcode, args) # memory instructions (loading and storing) are handled differently if re.match('(^ld.*)|(^st.*)|(^push.*)|(^pop.*)', opcode): return translation + _translate_mem_acc(opcode, args) # branch instructions aswell elif re.match('^b(?!ic$).*', opcode): return translation + _translate_branch(opcode, args, code_blocks) # separate the condition code and status flag from the opcode complete_opcode = opcode opcode, status, condition = _match_instruction(opcode) if not opcode: print('Opcode is missing in translations:') print(f'{complete_opcode} {" ".join(input_args)}\n') return '' translation += TRANSLATIONS[opcode].format(*args) if status: translation += _translate_status(opcode, args) if condition: return COND_TRANSLATIONS[condition] + translation + '}\n' return translation
def _match_instruction(opcode: str) -> Tuple[str, str, str]: '''Divides the opcode into the opcode itself, the status bit and the condition code. Parameters ---------- opcode : str Complete opcode in ARM assembly. Returns ------- str Opcode without suffix. str Status bit if used. str Condition code if used. ''' # in this case we do not have a condition code if len(opcode) == 1 or opcode[-2:] not in COND_TRANSLATIONS: if opcode in TRANSLATIONS: return opcode, '', '' elif opcode[-1] == 's' and opcode[:-1] in TRANSLATIONS: return opcode[:-1], 's', '' else: return '', '', '' # in this case the condition code matches (this could be false positive) else: if opcode[:-2] in TRANSLATIONS: return opcode[:-2], '', opcode[-2:] elif (len(opcode) > 3 and opcode[-3] == 's' and opcode[:-3] in TRANSLATIONS): return opcode[:-3], 's', opcode[-2:] elif opcode in TRANSLATIONS: return opcode, '', '' elif opcode[-1] == 's' and opcode[:-1] in TRANSLATIONS: return opcode[:-1], 's', '' else: return '', '', '' def _translate_mem_acc(opcode: str, args: List[str]) -> str: '''Translates instructions that load from or store into memory. These instructions are handled differently as there are a few variations for each of these opcode. Parameters ---------- opcode : str Opcode in ARM assembly. args : list[str] Arguments passed to the opcode. Returns ------- str C translation of the ARM instruction. ''' translation = '' cond_code = '' # split condition code from opcode if 'push' not in opcode and 'pop' not in opcode: digit_idx = re.search('\d', opcode).start() if opcode[digit_idx - 2:digit_idx] in COND_TRANSLATIONS: opcode_cpy = opcode opcode = opcode_cpy[:digit_idx - 2] + opcode_cpy[digit_idx:] cond_code = opcode_cpy[digit_idx - 2:digit_idx] else: if opcode[-2:] in COND_TRANSLATIONS: cond_code = opcode[-2:] opcode = opcode[:-2] # translate ldr and str instructions for 1, 2, 4 and 8 bytes if re.match('(^ldr.*)|(^str.*)', opcode): if opcode[3] == '8': reg1 = re.sub('[0-9]+$', lambda x: f"{str(int(x.group())+1)}", args[0]) if reg1 == 'r11': reg1 = 'fp' if re.match('^L(C|\d).*', args[1]): translation = f'{opcode}(&_asm_analysis_.{args[0]}.i, &_asm_analysis_.{reg1}.i, &{args[1]}, {args[2]});\n' else: translation = f'{opcode}(&_asm_analysis_.{args[0]}.i, &_asm_analysis_.{reg1}.i, &_asm_analysis_.{args[1]}.i, {args[2]});\n' else: if re.match('^L(C|\d).*', args[1]): translation = f'{opcode}(&_asm_analysis_.{args[0]}.i, &{args[1]}, {args[2]});\n' else: translation = f'{opcode}(&_asm_analysis_.{args[0]}.i, &_asm_analysis_.{args[1]}.i, {args[2]});\n' # translate ldm and stm elif re.match('(^ldm.*)|(^stm.*)', opcode): registers = '' if len(opcode) > 4 and (opcode[3:5] == 'da' or opcode[3:5] == 'db'): registers = '.i, &_asm_analysis_.'.join(reversed(args[1:])) else: registers = '.i, &_asm_analysis_.'.join(args[1:]) registers = f'&_asm_analysis_.{registers}.i' translation = f'{opcode}(&_asm_analysis_.{args[0]}.i, {len(args)-1}, {registers});\n' # translate push and pop elif re.match('(^push.*)|(^pop.*)', opcode): registers = '' if 'pop' in opcode: registers = '.i, &_asm_analysis_.'.join(reversed(args)) else: registers = '.i, &_asm_analysis_.'.join(args) registers = f'&_asm_analysis_.{registers}.i' translation = f'{opcode}({len(args)}, {registers});\n' if re.match('(^ldr.*)|(^ldm.*)|(^pop.*)', opcode) and 'pc' in args: if cond_code: translation += '//BRANCHTAKEN\n' return f'{COND_TRANSLATIONS[cond_code]}{translation}return;\n}}\n//BRANCHNOTTAKEN\n' translation += 'return;\n' if cond_code: return COND_TRANSLATIONS[cond_code] + translation + '}\n' else: return translation def _translate_branch(opcode: str, args: List[str], code_blocks: List[CodeBlock]) -> str: '''Translates branch instruction like b or bl. Parameters ---------- opcode : str Opcode in ARM assembly. args : list[str] Arguments passed to the opcode. code_blocks : list[CodeBlock] List containing the labeled code blocks with their instructions. Returns ------- str C translation of the ARM instruction. ''' translation = '' cond_code = '' if opcode[-2:] in COND_TRANSLATIONS: cond_code = opcode[-2:] opcode = opcode[:-2] # translate library calls using auxiliary functions if args[0] in auxiliary_functions.CALL_DICT: if opcode == 'b': translation = auxiliary_functions.CALL_DICT[args[0]] + 'return;\n' else: translation = auxiliary_functions.CALL_DICT[args[0]] # we cannot use goto for functions if opcode == 'b': function = next((item for item in code_blocks if item.name == args[0] and item.is_function), None) if function is not None: translation = f'{args[0]}();\nreturn;\n' if not translation: translation = TRANSLATIONS[opcode].format(*args) if cond_code: return f'{COND_TRANSLATIONS[cond_code]}//BRANCHTAKEN\n{translation}}}\n//BRANCHNOTTAKEN\n' return translation def _add_pre_suffix(opcode: str, args: List[str]) -> List[str]: '''This function is responsible for adding the .i or .f suffix to the registers of the union type in C. It also adds the prefix for the struct that is used in the template. Parameters ---------- opcode : str Opcode in ARM assembly. args : list[str] Arguments passed to the opcode. Returns ------- list[str] Arguments in which the registers now have the correct suffix and prefix for C. ''' suffix_re = '(^ld.*)|(^st.*)|(^push.*)|(^pop.*)|(^b$)|(^b[^i].*)' reg_re = '(^r[0-9]{1,2}$)|(^sp$)|(^fp$)|(^lr$)|(^pc$)|(^ip$)' if not re.match(suffix_re, opcode) and opcode != 'bl': for i, op in enumerate(args): if re.match(reg_re, op): args[i] = f'_asm_analysis_.{args[i]}.i' elif (re.match('(^ldr.*)|(^str.*)', opcode) and re.match(reg_re, args[2])): args[2] = f'_asm_analysis_.{args[2]}.i' return args def _translate_shift(opcode: str, args: List[str]) -> Tuple[str, List[str]]: '''Translates bit shifts which can be used within other instructions in ARM assembly. Parameters ---------- opcode : str Opcode in ARM assembly. args : list[str] Arguments passed to the opcode. Returns ------- str C code to update the carry bit if necessary. list[str] The partially translated parameters of the instruction. ''' translation = '' if len(args) > 2 and args[-2] in SHIFT_TRANSLATIONS: # for some opcodes, we need to update the carry flag if re.match( '(^movs.*)|(^mvns.*)|(^ands.*)|(^orrs.*)|(^orns.*)|(^eors.*)|(^bics.*)|(^teq.*)|(^tst.*)', opcode): if 'ror' in args[-2] or 'lsr' in args[-2] or 'asr' in args[-2]: translation = f'_asm_analysis_.c = {args[-3]} & (1 << {args[-1]} - 1);\n' elif 'lsl' in args[-2]: translation = f'_asm_analysis_.c = {args[-3]} & ((uint32_t) 0x80000000 >> {args[-1]} - 1);\n' return translation, [ *args[:-3], SHIFT_TRANSLATIONS[args[-2]].format(args[-3], args[-1])] else: return translation, args def _translate_status(opcode: str, args: List[str]) -> str: '''Translates the use of the suffix bit s in ARM assembly. Sometimes the suffix 's' is append in ARM assembly which induces an update of the status bits (N, V, C, Z). Parameters ---------- opcode : str Opcode in ARM assembly. args : list[str] Arguments passed to the opcode. Returns ------- str C translation that performs the update of the status bits. ''' result = '' result += f'_asm_analysis_.z = {args[0]} == 0;\n' result += f'_asm_analysis_.n = {args[0]} & 0x80000000;\n' # update the carry flag depending on the operation if re.match('^ad.*', opcode): result += f'_asm_analysis_.c = ((uint32_t) {args[0]}) < ((uint32_t) {args[1]});\n' result += f'_asm_analysis_.v = ({args[1]}&0x80000000) == ({args[2]}&0x80000000) ' result += f'&& ({args[0]}&0x80000000) != ({args[1]}&0x80000000);\n' elif re.match('(^sub.*)|(^rsb.*)|(^sbc.*)', opcode): result += f'_asm_analysis_.c = ((uint32_t) {args[1]}) >= ((uint32_t) {args[2]});\n' result += f'_asm_analysis_.v = ({args[1]}&0x80000000) != ({args[2]}&0x80000000) ' result += f'&& ({args[0]}&0x80000000) != ({args[1]}&0x80000000);\n' elif re.match('(^ror.*)|(^lsr.*)|(^asr.*)', opcode): result += f'_asm_analysis_.c = {args[1]} & (1 << {args[2]} - 1);\n' elif re.match('^lsl.*', opcode): result += f'_asm_analysis_.c = {args[1]} & ((uint32_t) 0x80000000 >> {args[2]} - 1);\n' return result TRANSLATIONS = { 'ctr': '_asm_analysis_.counters[{0}] ++;\n', 'memctr0': '_asm_analysis_.load_counter ++;\n', 'memctr1': '_asm_analysis_.store_counter ++;\n', 'add': '{0} = {1} + ({2});\n', 'adc': '{0} = {1} + ({2}) + c;\n', 'sub': '{0} = {1} - ({2});\n', 'sbc': '{0} = {1} - ({2}) - !c;\n', 'mul': '{0} = ({1}) * ({2});\n', 'mla': '{0} = (({1}) * ({2})) + ({3});\n', 'mls': '{0} = {3} - (({1}) * ({2}));\n', 'mov': '{0} = {1};\n', 'movt': '{0} = {0} | ({1} << 16);\n', 'movw': '{0} = {1};\n', 'mvn': '{0} = ~{1};\n', 'nop': '', 'b': 'goto {0};\n', 'bx': 'return;\n', 'bl': '{0}();\n', 'cmp': '_asm_analysis_.tmp = {0} - {1};\n_asm_analysis_.z = _asm_analysis_.tmp == 0;\n_asm_analysis_.n = _asm_analysis_.tmp & 0x80000000;\n_asm_analysis_.c = ((uint32_t) {0}) >= ((uint32_t) {1});\n_asm_analysis_.v = ({0}&0x80000000) != ({1}&0x80000000) && (_asm_analysis_.tmp&0x80000000) != ({0}&0x80000000);\n', 'cmn': '_asm_analysis_.tmp = {0} + {1};\n_asm_analysis_.z = _asm_analysis_.tmp == 0;\n_asm_analysis_.n = _asm_analysis_.tmp & 0x80000000;\n_asm_analysis_.c = ((uint32_t) _asm_analysis_.tmp) < ((uint32_t) {0});\n_asm_analysis_.v = ({0}&0x80000000) == ({1}&0x80000000) && (_asm_analysis_.tmp&0x80000000) != ({0}&0x80000000);\n', 'and': '{0} = {1} & {2};\n', 'bic': '{0} = {1} & ~{2};\n', 'rsb': '{0} = {2} - {1};\n', 'ror': '{0} = ((uint32_t){1} >> {2}) | ((uint32_t){1} << (32-{2}));\n', 'lsr': '{0} = (uint32_t){1} >> {2};\n', 'asr': '{0} = {1} >> {2};\n', 'lsl': '{0} = (uint32_t){1} << {2};\n', 'eor': '{0} = {1} ^ {2};\n', 'orr': '{0} = {1} | {2};\n', 'umull': 'umull(&{0}, &{1}, &{2}, &{3});\n', 'smull': 'smull(&{0}, &{1}, &{2}, &{3});\n', 'tst': '_asm_analysis_.tmp = {0} & {1};\n_asm_analysis_.z = _asm_analysis_.tmp == 0;\n_asm_analysis_.n = _asm_analysis_.tmp & 0x80000000;\n', 'teq': '_asm_analysis_.tmp = {1} ^ {2};\n_asm_analysis_.z = _asm_analysis_.tmp == 0;\n_asm_analysis_.n = _asm_analysis_.tmp & 0x80000000;\n', 'uxtb': '{0} = 0xff & (uint8_t){1};\n', 'uxth': '{0} = 0xffff & (uint16_t){1};\n', 'uxtab': '{0} = (0xff & (uint8_t){2}) + {1};\n', 'uxtah': '{0} = (0xffff & (uint16_t){2}) + {1};\n', 'sxtb': '{0} = (0xff & {1}) << 24 >> 24;\n', 'sxth': '{0} = (0xffff & {1}) << 16 >> 16;\n', 'sxtab': '{0} = ((0xff & {2}) << 24 >> 24) + {1};\n', 'sxtah': '{0} = ((0xffff & {2}) << 16 >> 16) + {1};\n', 'ubfx': '{0} = ({1} >> {2}) & ((1 << {3}) - 1);\n', 'clz': 'clz(&{0}, &{1});\n'} COND_TRANSLATIONS = { 'eq': 'if (_asm_analysis_.z){\n', 'ne': 'if (!_asm_analysis_.z){\n', 'ge': 'if (_asm_analysis_.n == _asm_analysis_.v){\n', 'gt': 'if (!_asm_analysis_.z && _asm_analysis_.n == _asm_analysis_.v){\n', 'le': 'if (_asm_analysis_.z || _asm_analysis_.n != _asm_analysis_.v){\n', 'lt': 'if (_asm_analysis_.n != _asm_analysis_.v){\n', 'ls': 'if (!_asm_analysis_.c || _asm_analysis_.z){\n', 'cs': 'if (_asm_analysis_.c){\n', 'cc': 'if (!_asm_analysis_.c){\n', 'hi': 'if (_asm_analysis_.c && !_asm_analysis_.z){\n', 'mi': 'if (_asm_analysis_.n){\n', 'pl': 'if (!_asm_analysis_.n){\n', 'al': 'if (true){\n', 'nv': 'if (false){\n', 'vs': 'if (_asm_analysis_.v){\n', 'vc': 'if (!_asm_analysis_.v){\n' } SHIFT_TRANSLATIONS = { 'lsr': '((uint32_t){0} >> {1})', 'asr': '({0} >> {1})', 'lsl': '((uint32_t){0} << {1})', 'ror': '(((uint32_t){0} >> {1}) | ((uint32_t){0} << (32-{1})))' }