Parse Ansible Log

This is a quick helper script to parse ansible logs for analysis.

Original Version

def parse_ansible_log(logfile):
    with open(logfile) as deploy:
        data = deploy.readlines()

    for num, line in enumerate(data):
        if "RECAP" in line:
            logonly = data[:num]
            results = data[(num + 1):]

    hosts = {}
    for line in results:
        hostname = line.split()[5]
        hosts[hostname] = []
        hosts[hostname].append(line.strip())

    for host in hosts.keys():
        for line in logonly:
            if host in line:
                hosts[host].append(line.strip())

    return hosts

if __name__ == "__main__":

    hosts = parse_ansible_log("deploy.log")

    unreachable = []
    failed = []
    success = []

    for host in hosts.keys():
        if "unreachable=1" in hosts[host][0]:
            unreachable.append(host)
        if "failed=1" in hosts[host][0]:
            if "and repo_installed" in hosts[host][-1]:
                success.append(host)
            elif "/bin/python" in hosts[host][-1]:
                continue
            else:
                failed.append(host)
        if "failed=0" in hosts[host][0] and "unreachable=0" in hosts[host][0]:
            success.append(host)
        # if "/bin/python" in hosts[host][-1]:
        #     print(host + " ansible_python_interpreter=/usr/local/bin/python")
    hostsuccess = {}
    hostfails = {}

    # print("__________________SUCCESS________________________")
    # [print(line) for line in success]

    for host in success:
        hostsuccess[host] = "successful"

    # print("__________________UNREACHABLE____________________")
    # [print(line) for line in unreachable]
    for host in unreachable:
        if "uthent" in hosts[host][-1]:
            hostfails[host] = "authentication error"
        elif "Name or service not known" in hosts[host][-1]:
            hostfails[host] = "DNS Error"
        elif "timed out" in hosts[host][-1]:
            hostfails[host] = "timed out"
        elif "SSH protocol" in hosts[host][-1]:
            hostfails[host] = "ssh protocol"
        elif "Unable to connect to port 22" in hosts[host][-1]:
            hostfails[host] = "Unable to connect to port 22"
        elif "Network is unreachable" in hosts[host][-1]:
            hostfails[host] = "Network is unreachable"
        else:
            hostfails[host] = hosts[host][-1]

    # print("__________________FAILED_________________________")
    #[print(line) for line in failed]

    for host in failed:
        if "cache_update" in hosts[host][-1]:
            hostfails[host] = "apt error"
        elif "simplejson" in hosts[host][-1].lower():
            hostfails[host] = "simplejson error"
        elif "arch.rc" in hosts[host][-1].lower():
            hostfails[host] = "simplejson error"
        elif "is listed more than once" in hosts[host][-1].lower():
            hostfails[host] = "Repo is listed more than once"
        elif "python2 bindings for rpm" in hosts[host][-1].lower():
            hostfails[host] = "python2 bindings for rpm"
        elif "found available" in hosts[host][-1].lower():
            hostfails[host] = "No package matching"
        elif "repomd.xml" in hosts[host][-1].lower():
            hostfails[host] = "HTTP Error 404"
        elif "baseurl" in hosts[host][-1].lower():
            hostfails[host] = "Cannot find a valid baseurl"
        elif "yum_base" in hosts[host][-1].lower():
            hostfails[host] = "YumBase' object has no attribute 'preconf'"
        else:
            hostfails[host] = hosts[host][-1]

    for key,value in hostsuccess.items():
        print(key,'#',value)

    for key,value in hostfails.items():
        print(key,'#',value)

Biggest Issues

  • Most of script buried underneath if __name__ == "__main__":

  • No comments

  • Lots of looping over same set of data

  • Spaghetti logic

  • Insufficient validation checks

New Version

#!/usr/bin/env python
'''
This script takes the path name to an ansible log, parses
the data, and produces a more digestible form.
'''
import logging
import re
import sys


# Map object for parsing ansible log data
parse_map = [
    # success
    {   'key': re.compile('unreachable=0\s+failed=0'),
        'log': 'successful',
        'lvl': 'success'},
    # unreachable
    {   'key': re.compile('uthent'),
        'log': 'authentication error',
        'lvl': 'unreachable'},
    {   'key': re.compile('Name or service not known'),
        'log': 'DNS Error',
        'lvl': 'unreachable'},
    {   'key': re.compile('timed out'),
        'log': 'timed out',
        'lvl': 'unreachable'},
    {   'key': re.compile('SSH protocol'),
        'log': 'ssh protocol',
        'lvl': 'unreachable'},
    {   'key': re.compile('Unable to connect to port 22'),
        'log': 'Unable to connect to port 22',
        'lvl': 'unreachable'},
    {   'key': re.compile('Network is unreachable'),
        'log': 'Network is unreachable',
        'lvl': 'unreachable'},
    # failed
    {   'key': re.compile('cache_update'),
        'log': 'apt error',
        'lvl': 'failed'},
    {   'key': re.compile('simplejson error'),
        'log': 'simplejson error',
        'lvl': 'failed'},
    {   'key': re.compile('arch\.rc'),
        'log': 'simplejson error',
        'lvl': 'failed'},
    {   'key': re.compile('is listed more than once'),
        'log': 'Repo is listed more than once',
        'lvl': 'failed'},
    {   'key': re.compile('python2 bindings for rpm'),
        'log': 'python2 bindings for rpm',
        'lvl': 'failed'},
    {   'key': re.compile('found available'),
        'log': 'No package matching',
        'lvl': 'failed'},
    {   'key': re.compile('repomd\.xml'),
        'log': 'HTTP Error 404',
        'lvl': 'failed'},
    {   'key': re.compile('baseurl'),
        'log': 'Cannot find a valid baseurl',
        'lvl': 'failed'},
    {   'key': re.compile('yum_base'),
        'log': "YumBase' object has no attribute 'preconf'",
        'lvl': 'failed'},
    {   'key': re.compile('\/bin\/python'),
        'log': 'Unsupported python version',
        'lvl': 'failed'},
]


def main():
    '''
    Read contents of csv file and print a summary of data
    '''
    if len(sys.argv) != 2:
        logging.critical('Exactly one argument (input file) is supported')
        sys.exit(1)
    results = parse_log(sys.argv[1])
    if not results:
        logging.critical('Error reading log data')
        sys.exit(1)
    print_results(results)


def parse_log(path):
    '''
    Read the contents of a file and turn it into pretty data
    '''
    data = read_file(path)
    if not data:
        return None
    hosts = read_hosts(data)
    return parse_results(hosts)


def read_hosts(log_data):
    '''
    Read host information from log data
    '''
    hosts = {}

    for num, line in enumerate(log_data):
        if 'RECAP' in line:
            logonly = log_data[:num]
            results = log_data[(num + 1):]

    for line in results:
        hostname = line.split()[5]
        hosts[hostname] = []
        hosts[hostname].append(line.strip())

    for host in hosts.keys():
        for line in logonly:
            if host in line:
                hosts[host].append(line.strip())

    return hosts


def parse_results(hosts):
    '''
    Read host data and return a results in the form dict( 'key': list(), )
    '''
    results = {
        'unreachable': {},
        'failed': {},
        'unknown': {},
        'success': {}}

    for host, log in hosts.items():
        inmap = False
        for pmap in parse_map:
            if pmap['key'].search(log[-1]):
                results[pmap['lvl']][host] = pmap['log']
                inmap = True
                break
            if not inmap:
                results['unknown'][host] = 'Unknown: {}'.format(log[-1])

    return results


def print_results(results):
    '''
    Print parsed ansible results
    '''
    for status, rset in results.items():
        if status == 'success':
            continue
        for host, res in rset.items():
            print('{}#{}'.format(host, res))


    print('\nCounts:')
    for status, rset in results.items():
        print('  {}: {}'.format(status.ljust(14), len(rset)))


def read_file(path):
    '''
    Return raw list of log data
    '''
    try:
        with open(path, 'rb') as log_file:
            return log_file.readlines()
    except IOError:
        logging.warning('Path does not exist: %s', path)
        return None


if __name__ == '__main__':
    main()