Skip to content

CGATcore Logfile Module

Logfile.py - logfile parsing

:Tags: Python

Purpose

Parse logfiles

Usage

Example::

python cgat_script_template.py --help

Type::

python cgat_script_template.py --help

for command line help.

Documentation

Code

RX_FINISH = re.compile('job finished in (\\S+) seconds at (.*) --\\s+(\\S+)\\s+(\\S+)\\s+(\\S+)\\s+(\\S+) -- (\\S+)') module-attribute

RX_JOB = re.compile('job started at (.*) on (\\S+) -- (\\S+)') module-attribute

RX_START = re.compile('output generated by (\\S+) (.*)') module-attribute

RuntimeInformation = collections.namedtuple('RuntimeInformation', 'script options jobid host has_finished start_date end_date wall utime stime cutime cstime') module-attribute

LogFileData

Source code in cgatcore/logfile.py
class LogFileData:
    mRegex = re.compile(
        r"# job finished in (\d+) seconds at (.*) --\s+([.\d]+)\s+([.\d]+)\s+([.\d]+)\s+([.\d]+)")
    mFormat = "%6.2f"
    mDivider = 1.0

    def __init__(self):

        self.mWall = 0
        self.mUser = 0
        self.mSys = 0
        self.mChildUser = 0
        self.mChildSys = 0
        self.mNChunks = 0

    def add(self, line):

        if not self.mRegex.match(line):
            return
        t_wall, date, t_user, t_sys, t_child_user, t_child_sys = self.mRegex.match(
            line).groups()

        self.mWall += int(t_wall)
        self.mUser += float(t_user)
        self.mSys += float(t_sys)
        self.mChildUser += float(t_child_user)
        self.mChildSys += float(t_child_sys)
        self.mNChunks += 1

    def __getitem__(self, key):

        if key == "wall":
            return self.mWall
        elif key == "user":
            return self.mUser
        elif key == "sys":
            return self.mSys
        elif key == "cuser":
            return self.mChildUser
        elif key == "csys":
            return self.mChildSys
        elif key == "nchunks":
            return self.mNChunks
        else:
            raise ValueError("key %s not found" % key)

    def __add__(self, other):

        self.mWall += other.mWall
        self.mUser += other.mUser
        self.mSys += other.mSys
        self.mChildUser += other.mChildUser
        self.mChildSys += other.mChildSys
        self.mNChunks += other.mNChunks

        return self

    def __str__(self):

        return "%i\t%s" % (
            self.mNChunks,
            "\t".join([self.mFormat % (float(x) / self.mDivider) for x in (
                self.mWall, self.mUser, self.mSys,
                self.mChildUser, self.mChildSys)]))

    def getHeader(self):
        return "\t".join(("chunks", "wall", "user", "sys", "cuser", "csys"))

mChildSys = 0 instance-attribute

mChildUser = 0 instance-attribute

mDivider = 1.0 class-attribute instance-attribute

mFormat = '%6.2f' class-attribute instance-attribute

mNChunks = 0 instance-attribute

mRegex = re.compile('# job finished in (\\d+) seconds at (.*) --\\s+([.\\d]+)\\s+([.\\d]+)\\s+([.\\d]+)\\s+([.\\d]+)') class-attribute instance-attribute

mSys = 0 instance-attribute

mUser = 0 instance-attribute

mWall = 0 instance-attribute

__add__(other)

Source code in cgatcore/logfile.py
def __add__(self, other):

    self.mWall += other.mWall
    self.mUser += other.mUser
    self.mSys += other.mSys
    self.mChildUser += other.mChildUser
    self.mChildSys += other.mChildSys
    self.mNChunks += other.mNChunks

    return self

__getitem__(key)

Source code in cgatcore/logfile.py
def __getitem__(self, key):

    if key == "wall":
        return self.mWall
    elif key == "user":
        return self.mUser
    elif key == "sys":
        return self.mSys
    elif key == "cuser":
        return self.mChildUser
    elif key == "csys":
        return self.mChildSys
    elif key == "nchunks":
        return self.mNChunks
    else:
        raise ValueError("key %s not found" % key)

__init__()

Source code in cgatcore/logfile.py
def __init__(self):

    self.mWall = 0
    self.mUser = 0
    self.mSys = 0
    self.mChildUser = 0
    self.mChildSys = 0
    self.mNChunks = 0

__str__()

Source code in cgatcore/logfile.py
def __str__(self):

    return "%i\t%s" % (
        self.mNChunks,
        "\t".join([self.mFormat % (float(x) / self.mDivider) for x in (
            self.mWall, self.mUser, self.mSys,
            self.mChildUser, self.mChildSys)]))

add(line)

Source code in cgatcore/logfile.py
def add(self, line):

    if not self.mRegex.match(line):
        return
    t_wall, date, t_user, t_sys, t_child_user, t_child_sys = self.mRegex.match(
        line).groups()

    self.mWall += int(t_wall)
    self.mUser += float(t_user)
    self.mSys += float(t_sys)
    self.mChildUser += float(t_child_user)
    self.mChildSys += float(t_child_sys)
    self.mNChunks += 1

getHeader()

Source code in cgatcore/logfile.py
def getHeader(self):
    return "\t".join(("chunks", "wall", "user", "sys", "cuser", "csys"))

LogFileDataLines

Bases: LogFileData

record lines.

Source code in cgatcore/logfile.py
class LogFileDataLines(LogFileData):

    """record lines."""

    def __init__(self):
        LogFileData.__init__(self)
        self.mNLines = 0

    def add(self, line):
        if line[0] != "#":
            self.mNLines += 1
        else:
            return LogFileData.add(self, line)

    def __getitem__(self, key):
        if key == "lines":
            return self.mNLines
        else:
            return LogFileData.__getitem__(self, key)

    def __add__(self, other):
        self.mNLines += other.mNLines
        return LogFileData.__add__(self, other)

    def __str__(self):
        return "%s\t%i" % (LogFileData.__str__(self), self.mNLines)

    def getHeader(self):
        return "%s\t%s" % (LogFileData.getHeader(self), "lines")

mNLines = 0 instance-attribute

__add__(other)

Source code in cgatcore/logfile.py
def __add__(self, other):
    self.mNLines += other.mNLines
    return LogFileData.__add__(self, other)

__getitem__(key)

Source code in cgatcore/logfile.py
def __getitem__(self, key):
    if key == "lines":
        return self.mNLines
    else:
        return LogFileData.__getitem__(self, key)

__init__()

Source code in cgatcore/logfile.py
def __init__(self):
    LogFileData.__init__(self)
    self.mNLines = 0

__str__()

Source code in cgatcore/logfile.py
def __str__(self):
    return "%s\t%i" % (LogFileData.__str__(self), self.mNLines)

add(line)

Source code in cgatcore/logfile.py
def add(self, line):
    if line[0] != "#":
        self.mNLines += 1
    else:
        return LogFileData.add(self, line)

getHeader()

Source code in cgatcore/logfile.py
def getHeader(self):
    return "%s\t%s" % (LogFileData.getHeader(self), "lines")

parse(filename)

Source code in cgatcore/logfile.py
def parse(filename):

    results = []
    script, options, started, finished, host, jobid = "", "", "?", "?", "?", "?"
    wall, utime, stime, cutime, cstime = [0] * 5
    with open(filename) as inf:
        for line in inf:
            x = RX_START.search(line)
            if x:
                if script:
                    results.append(RuntimeInformation._make((
                        script, options,
                        jobid, host,
                        finished != "",
                        started, finished,
                        int(wall), float(utime), float(stime), float(cutime), float(cstime))))
                script, options, started, finished, host, jobid = "", "", "?", "?", "?", ""
                wall, utime, stime, cutime, cstime = [0] * 5
                script, options = x.groups()
                continue
            x = RX_JOB.search(line)
            if x:
                started, host, jobid = x.groups()
                continue
            x = RX_FINISH.search(line)
            if x:
                wall, finished, utime, stime, cutime, cstime, _jobid = x.groups(
                )
                assert _jobid == jobid

    results.append(RuntimeInformation._make((
        script, options,
        jobid, host,
        finished != "",
        started, finished,
        int(wall), float(utime), float(stime), float(cutime), float(cstime))))

    return results