Source code for osaca.db_interface

#!/usr/bin/env python3

import math
import os
import sys
import warnings

import ruamel.yaml

from osaca.semantics import MachineModel


[docs]def sanity_check(arch: str, verbose=False, output_file=sys.stdout):
    """
    Checks the database for missing TP/LT values, instructions might missing int the ISA DB and
    duplicate instructions.

    :param arch: micro-arch key to define DB to check
    :type arch: str
    :param verbose: verbose output flag, defaults to `False`
    :type verbose: bool, optional
    :param output_file: output stream specifying where to write output, defaults to :class:`sys.      stdout`
    :type output_file: stream, optional

    """
    # load arch machine model
    arch_mm = MachineModel(arch=arch)
    data = arch_mm['instruction_forms']
    # load isa machine model
    isa = arch_mm.get_ISA()
    isa_mm = MachineModel(arch='isa/{}'.format(isa))
    num_of_instr = len(data)

    # check arch DB entries
    (
        missing_throughput,
        missing_latency,
        missing_port_pressure,
        suspicious_instructions,
        duplicate_instr_arch,
    ) = _check_sanity_arch_db(arch_mm, isa_mm)
    # check ISA DB entries
    duplicate_instr_isa, only_in_isa = _check_sanity_isa_db(arch_mm, isa_mm)

    report = _get_sanity_report(
        num_of_instr,
        missing_throughput,
        missing_latency,
        missing_port_pressure,
        suspicious_instructions,
        duplicate_instr_arch,
        duplicate_instr_isa,
        only_in_isa,
        verbose=verbose,
        colors=True if output_file == sys.stdout else False,
    )
    print(report, file=output_file)


[docs]def import_benchmark_output(arch, bench_type, filepath, output=sys.stdout):
    """
    Import benchmark results from micro-benchmarks.

    :param arch: target architecture key
    :type arch: str
    :param bench_type: key for defining type of benchmark output
    :type bench_type: str
    :param filepath: filepath to the output file
    :type filepath: str
    :param output: output stream to dump, defaults to sys.stdout
    :type output: stream
    """
    supported_bench_outputs = ['ibench', 'asmbench']
    assert os.path.exists(filepath)
    if bench_type not in supported_bench_outputs:
        raise ValueError('Benchmark type is not supported.')
    with open(filepath, 'r') as f:
        input_data = f.readlines()
    db_entries = None
    mm = MachineModel(arch)
    if bench_type == 'ibench':
        db_entries = _get_ibench_output(input_data, mm.get_ISA())
    elif bench_type == 'asmbench':
        db_entries = _get_asmbench_output(input_data, mm.get_ISA())
    # write entries to DB
    for entry in db_entries:
        mm.set_instruction_entry(db_entries[entry])
    if output is None:
        print(mm.dump())
    else:
        mm.dump(stream=output)


##################
# HELPERS IBENCH #
##################


def _get_asmbench_output(input_data, isa):
    """
    Parse asmbench output in the format

    1 MNEMONIC[-OP1[_OP2][...]]
    2 Latency: X cycles
    3 Throughput: Y cycles
    4

    and creates per 4 lines in the input_data one entry in the database.

    :param str input_data: content of asmbench output file
    :param str isa: ISA of target architecture (x86, AArch64, ...)
    : return: dictionary with all new db_entries
    """
    db_entries = {}
    for i in range(0, len(input_data), 4):
        if input_data[i + 3].strip() != '':
            print('asmbench output not in the correct format! Format must be: ', file=sys.stderr)
            print(
                '-------------\nMNEMONIC[-OP1[_OP2][...]]\nLatency: X cycles\n'
                'Throughput: Y cycles\n\n-------------',
                file=sys.stderr,
            )
            print(
                'Entry {} and all further entries won\'t be added.'.format((i / 4) + 1),
                file=sys.stderr,
            )
            break
        else:
            i_form = input_data[i].strip()
            mnemonic = i_form.split('-')[0]
            operands = i_form.split('-')[1].split('_')
            operands = [_create_db_operand(op, isa) for op in operands]
            entry = {
                'name': mnemonic,
                'operands': operands,
                'throughput': _validate_measurement(float(input_data[i + 2].split()[1]), 'tp'),
                'latency': _validate_measurement(float(input_data[i + 1].split()[1]), 'lt'),
                'port_pressure': None,
            }
            if not entry['throughput'] or not entry['latency']:
                warnings.warn(
                    'Your measurement for {} looks suspicious'.format(i_form)
                    + ' and was not added. Please inspect your benchmark.'
                )
            db_entries[i_form] = entry
    return db_entries


def _get_ibench_output(input_data, isa):
    """Parse the standard output of ibench and add instructions to DB."""
    db_entries = {}
    for line in input_data:
        if 'Using frequency' in line or len(line) == 0:
            continue
        instruction = line.split(':')[0]
        key = '-'.join(instruction.split('-')[:2])
        if key in db_entries:
            # add only TP/LT value
            entry = db_entries[key]
        else:
            mnemonic = instruction.split('-')[0]
            operands = instruction.split('-')[1].split('_')
            operands = [_create_db_operand(op, isa) for op in operands]
            entry = {
                'name': mnemonic,
                'operands': operands,
                'throughput': None,
                'latency': None,
                'port_pressure': None,
            }
        if 'TP' in instruction:
            entry['throughput'] = _validate_measurement(float(line.split()[1]), 'tp')
            if not entry['throughput']:
                warnings.warn(
                    'Your THROUGHPUT measurement for {} looks suspicious'.format(key)
                    + ' and was not added. Please inspect your benchmark.'
                )
        elif 'LT' in instruction:
            entry['latency'] = _validate_measurement(float(line.split()[1]), 'lt')
            if not entry['latency']:
                warnings.warn(
                    'Your LATENCY measurement for {} looks suspicious'.format(key)
                    + ' and was not added. Please inspect your benchmark.'
                )
        db_entries[key] = entry
    return db_entries


def _validate_measurement(measurement, mode):
    """
    Check if latency has a maximum deviation of 0.05% and throughput is a reciprocal of a
    an integer number.
    """
    if mode == 'lt':
        if (
            math.floor(measurement) * 1.05 >= measurement
            or math.ceil(measurement) * 0.95 <= measurement
        ):
            # Value is probably correct, so round it to the estimated value
            return float(round(measurement))
        # Check reciprocal only if it is a throughput value
    elif mode == 'tp':
        reciprocals = [1 / x for x in range(1, 11)]
        for reci in reciprocals:
            if reci * 0.95 <= measurement <= reci * 1.05:
                # Value is probably correct, so round it to the estimated value
                return round(reci, 5)
    # No value close to an integer or its reciprocal found, we assume the
    # measurement is incorrect
    return None


def _create_db_operand(operand, isa):
    """Get DB operand by input string and ISA."""
    if isa == 'aarch64':
        return _create_db_operand_aarch64(operand)
    elif isa == 'x86':
        return _create_db_operand_x86(operand)


def _create_db_operand_aarch64(operand):
    """Get DB operand for AArch64 by operand string."""
    if operand == 'i':
        return {'class': 'immediate', 'imd': 'int'}
    elif operand in 'wxbhsdq':
        return {'class': 'register', 'prefix': operand}
    elif operand.startswith('v'):
        return {
            'class': 'register',
            'prefix': 'v',
            'shape': operand[1:2] if operand[1:2] != '' else 'd',
        }
    elif operand.startswith('m'):
        return {
            'class': 'memory',
            'base': 'x' if 'b' in operand else None,
            'offset': 'imd' if 'o' in operand else None,
            'index': 'gpr' if 'i' in operand else None,
            'scale': 8 if 's' in operand else 1,
            'pre-indexed': True if 'r' in operand else False,
            'post-indexed': True if 'p' in operand else False,
        }
    else:
        raise ValueError('Parameter {} is not a valid operand code'.format(operand))


def _create_db_operand_x86(operand):
    """Get DB operand for AArch64 by operand string."""
    if operand == 'r':
        return {'class': 'register', 'name': 'gpr'}
    elif operand in 'xyz':
        return {'class': 'register', 'name': operand + 'mm'}
    elif operand == 'i':
        return {'class': 'immediate', 'imd': 'int'}
    elif operand.startswith('m'):
        return {
            'class': 'memory',
            'base': 'gpr' if 'b' in operand else None,
            'offset': 'imd' if 'o' in operand else None,
            'index': 'gpr' if 'i' in operand else None,
            'scale': 8 if 's' in operand else 1,
        }
    else:
        raise ValueError('Parameter {} is not a valid operand code'.format(operand))


########################
# HELPERS SANITY CHECK #
########################


def _check_sanity_arch_db(arch_mm, isa_mm):
    """Do sanity check for ArchDB by given ISA."""
    suspicious_prefixes_x86 = ['vfm', 'fm']
    suspicious_prefixes_arm = ['fml', 'ldp', 'stp', 'str']
    if arch_mm.get_ISA().lower() == 'aarch64':
        suspicious_prefixes = suspicious_prefixes_arm
    if arch_mm.get_ISA().lower() == 'x86':
        suspicious_prefixes = suspicious_prefixes_x86

    # returned lists
    missing_throughput = []
    missing_latency = []
    missing_port_pressure = []
    suspicious_instructions = []
    duplicate_instr_arch = []

    for instr_form in arch_mm['instruction_forms']:
        # check value in DB entry
        if instr_form['throughput'] is None:
            missing_throughput.append(instr_form)
        if instr_form['latency'] is None:
            missing_latency.append(instr_form)
        if instr_form['port_pressure'] is None:
            missing_port_pressure.append(instr_form)
        # check entry against ISA DB
        for prefix in suspicious_prefixes:
            if instr_form['name'].lower().startswith(prefix):
                # check if instruction in ISA DB
                if isa_mm.get_instruction(instr_form['name'], instr_form['operands']) is None:
                    # if not, mark them as suspicious and print it on the screen
                    suspicious_instructions.append(instr_form)
        # instr forms with less than 3 operands might need an ISA DB entry due to src_reg operands
        if (
            len(instr_form['operands']) < 3
            and 'mov' not in instr_form['name'].lower()
            and not instr_form['name'].lower().startswith('j')
            and instr_form not in suspicious_instructions
            and isa_mm.get_instruction(instr_form['name'], instr_form['operands']) is None
        ):
            suspicious_instructions.append(instr_form)
        # check for duplicates in DB
        if arch_mm._check_for_duplicate(instr_form['name'], instr_form['operands']):
            duplicate_instr_arch.append(instr_form)
    # every entry exists twice --> uniquify
    tmp_list = []
    for i in range(0, len(duplicate_instr_arch)):
        tmp = duplicate_instr_arch.pop()
        if tmp not in duplicate_instr_arch:
            tmp_list.append(tmp)
    duplicate_instr_arch = tmp_list
    return (
        missing_throughput,
        missing_latency,
        missing_port_pressure,
        suspicious_instructions,
        duplicate_instr_arch,
    )


def _check_sanity_isa_db(arch_mm, isa_mm):
    """Do sanity check for an ISA DB."""
    # returned lists
    duplicate_instr_isa = []
    only_in_isa = []

    for instr_form in isa_mm['instruction_forms']:
        # check if instr is missing in arch DB
        if arch_mm.get_instruction(instr_form['name'], instr_form['operands']) is None:
            only_in_isa.append(instr_form)
        # check for duplicates
        if isa_mm._check_for_duplicate(instr_form['name'], instr_form['operands']):
            duplicate_instr_isa.append(instr_form)
    # every entry exists twice --> uniquify
    tmp_list = []
    for i in range(0, len(duplicate_instr_isa)):
        tmp = duplicate_instr_isa.pop()
        if tmp not in duplicate_instr_isa:
            tmp_list.append(tmp)
    duplicate_instr_isa = tmp_list

    return duplicate_instr_isa, only_in_isa


def _get_sanity_report(
    total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, verbose=False, colors=False
):
    """Get sanity summary report."""
    s = ''
    # non-verbose summary
    s += 'SUMMARY\n----------------------\n'
    s += '{}% ({}/{}) of instruction forms have no throughput value.\n'.format(
        round(100 * len(m_tp) / total), len(m_tp), total
    )
    s += '{}% ({}/{}) of instruction forms have no latency value.\n'.format(
        round(100 * len(m_l) / total), len(m_l), total
    )
    s += '{}% ({}/{}) of instruction forms have no port pressure assignment.\n'.format(
        round(100 * len(m_pp) / total), len(m_pp), total
    )
    s += '{}% ({}/{}) of instruction forms might miss an ISA DB entry.\n'.format(
        round(100 * len(suspic_instr) / total), len(suspic_instr), total
    )
    s += '{} duplicate instruction forms in uarch DB.\n'.format(len(dup_arch))
    s += '{} duplicate instruction forms in ISA DB.\n'.format(len(dup_isa))
    s += (
        '{} instruction forms in ISA DB are not referenced by instruction '.format(len(only_isa))
        + 'forms in uarch DB.\n'
    )
    s += '----------------------\n'
    # verbose version
    if verbose:
        s += _get_sanity_report_verbose(
            total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, colors=colors
        )
    return s


def _get_sanity_report_verbose(
    total, m_tp, m_l, m_pp, suspic_instr, dup_arch, dup_isa, only_isa, colors=False
):
    """Get the verbose part of the sanity report with all missing instruction forms."""
    BRIGHT_CYAN = '\033[1;36;1m' if colors else ''
    BRIGHT_BLUE = '\033[1;34;1m' if colors else ''
    BRIGHT_RED = '\033[1;31;1m' if colors else ''
    BRIGHT_MAGENTA = '\033[1;35;1m' if colors else ''
    BRIGHT_YELLOW = '\033[1;33;1m' if colors else ''
    CYAN = '\033[36m' if colors else ''
    YELLOW = '\033[33m' if colors else ''
    WHITE = '\033[0m' if colors else ''

    s = ''
    s += 'Instruction forms without throughput value:\n' if len(m_tp) != 0 else ''
    for instr_form in m_tp:
        s += '{}{}{}\n'.format(BRIGHT_BLUE, _get_full_instruction_name(instr_form), WHITE)
    s += 'Instruction forms without latency value:\n' if len(m_l) != 0 else ''
    for instr_form in m_l:
        s += '{}{}{}\n'.format(BRIGHT_RED, _get_full_instruction_name(instr_form), WHITE)
    s += 'Instruction forms without port pressure assignment:\n' if len(m_pp) != 0 else ''
    for instr_form in m_pp:
        s += '{}{}{}\n'.format(BRIGHT_MAGENTA, _get_full_instruction_name(instr_form), WHITE)
    s += 'Instruction forms which might miss an ISA DB entry:\n' if len(suspic_instr) != 0 else ''
    for instr_form in suspic_instr:
        s += '{}{}{}\n'.format(BRIGHT_CYAN, _get_full_instruction_name(instr_form), WHITE)
    s += 'Duplicate instruction forms in uarch DB:\n' if len(dup_arch) != 0 else ''
    for instr_form in dup_arch:
        s += '{}{}{}\n'.format(YELLOW, _get_full_instruction_name(instr_form), WHITE)
    s += 'Duplicate instruction forms in ISA DB:\n' if len(dup_isa) != 0 else ''
    for instr_form in dup_isa:
        s += '{}{}{}\n'.format(BRIGHT_YELLOW, _get_full_instruction_name(instr_form), WHITE)
    s += (
        'Instruction forms existing in ISA DB but not in uarch DB:\n' if len(only_isa) != 0 else ''
    )
    for instr_form in only_isa:
        s += '{}{}{}\n'.format(CYAN, _get_full_instruction_name(instr_form), WHITE)
    return s


###################
# GENERIC HELPERS #
###################


def _get_full_instruction_name(instruction_form):
    """Get full instruction form name/identifier string out of given instruction form."""
    operands = []
    for op in instruction_form['operands']:
        op_attrs = [
            y + ':' + str(op[y])
            for y in list(filter(lambda x: True if x != 'class' else False, op))
        ]
        operands.append('{}({})'.format(op['class'], ','.join(op_attrs)))
    return '{}  {}'.format(instruction_form['name'], ','.join(operands))


def __represent_none(self, data):
    """Get YAML None representation."""
    return self.represent_scalar(u'tag:yaml.org,2002:null', u'~')


def _create_yaml_object():
    """Create YAML module with None representation."""
    yaml_obj = ruamel.yaml.YAML()
    yaml_obj.representer.add_representer(type(None), __represent_none)
    return yaml_obj


def __dump_data_to_yaml(filepath, data):
    """Dump data to YAML file at given filepath."""
    # first add 'normal' meta data in the right order (no ordered dict yet)
    meta_data = dict(data)
    del meta_data['instruction_forms']
    del meta_data['port_model_scheme']
    with open(filepath, 'w') as f:
        ruamel.yaml.dump(meta_data, f, allow_unicode=True)
    with open(filepath, 'a') as f:
        # now add port model scheme in |-scheme for better readability
        ruamel.yaml.dump(
            {'port_model_scheme': data['port_model_scheme']},
            f,
            allow_unicode=True,
            default_style='|',
        )
        # finally, add instruction forms
        ruamel.yaml.dump({'instruction_forms': data['instruction_forms']}, f, allow_unicode=True)