Source code for railroadtracks.unifex

# Copyright 2014-2015 Novartis Institutes for Biomedical Research

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Unified execution layer.

One general way to run things on the command line.
"""

import sys, os, argparse
import collections
import logging
logger = logging.getLogger(__name__)
import railroadtracks
from importlib import import_module
from . import environment
from . import core


LOG_DEBUG = 'DEBUG'
LOGGING_LEVELS = ('INFO', LOG_DEBUG)

class UnifexError(Exception):
    pass



#FIXME: several classes 'Call' in rrt, I think. Delete/rename others
[docs]class Call(object):
    """ Unified call, turning a step + assets + parameters into a task. """
    def __init__(self, step, assets, parameters):
        self._step = step
        self._assets = assets
        self._parameters = parameters

[docs]    def execute(self):
        """ Execute the task. """
        return self._step.run(self._assets, self._parameters)

    @property
    def step(self):
        return self._step
    @property
    def assets(self):
        return self._assets
    @property
    def parameters(self):
        return self._parameters

    def __str__(self):
        res = (super(Call, self).__str__(),
               '  step: %s' % str(self.step),
               '  assets: %s' % str(self.assets),
               '  parameters: %s' % str(self.parameters))
        return os.linesep.join(res)

def _set_logging(args):
    if args.logging_file is not None:
        logging.basicConfig(filename=args.logging_file,
                            level=getattr(logging, args.logging_level))

def _make_stepdict(module):
    """ 
    Make a dict of classes (key=step name, value=class).
    Duplicate step names will raise a ValueError.
    :param classlist: module 
    :rtype: :class:`namespace`
    """
    classlist = core.steplist(module)
    d = dict()
    for cls in classlist:
        if not issubclass(cls, (core.StepAbstract,)):
            raise ValueError("Classes used as step must inherit from core.StepAbstract.")
        stepname = cls._name
        if stepname is None:
            raise ValueError('The step name for class "%s" is not defined.' % cls)
        elif stepname in d:
            raise ValueError('The step name "%s" is defined twice.' % stepname)
        d[stepname] = cls
    return d


def _extract_argdict(arglist):
    """ Build dictionary from list of parameters.
    """
    argdict = collections.defaultdict(list)
    if arglist is not None:
         for src in arglist:
            key, value = src.split('=', 1)
            argdict[key].append(value)
    return argdict

def _extract_arglist(argdict):
    """ Build list of parameters from a mapping of parameters.
    """
    arglist = list()
    if argdict is not None:
        for key, value in argdict.items():
            for v in value:
                src = '='.join((key, v))
                arglist.append(src)
    return tuple(arglist)


#FIXME: moved to unifex.py ?
def _cmdfromuei(uei):
    cmd = [sys.executable, '-m', 'railroadtracks.unifex', 'run', uei.model]
    # In the unified command line, the executable does not have to be specified,
    # and when parsing command line arguments with argparse the associated
    # parameter is set to None.
    # When building a unified command line, we skip the executable if it is
    # set to None.
    if uei.executable is not None:
        cmd.append(uei.executable)
    cmd.append('-s')
    cmd.extend('%s=%s' % (x,y.name) for x,y in zip(uei.source._fields, uei.source))
    cmd.append('-t')
    cmd.extend('%s=%s' % (x,y.name) for x,y in zip(uei.target._fields, uei.target))
    if len(uei.parameters) > 0:
        cmd.append('-p')
        cmd.extend(" '%s'" % x for x in uei.parameters)
    return cmd


#FIXME: needed ?
# def wrapper_run(model_cls, sources, targets,
#                 parameters):
#     executable = model_cls(args.executable, parameters = args.parameters)
#     res = executable.run(sources, targets)
#     return res


def _model_instance(args, steplist):
    """
    :param args: arguments, such as the ones returned by :meth:`argparse.ArgumentParser.parse`.
    
    This should contain 2 attributes:
    - :attr:`model`, the name of the model used for execution,
    - :attr:`executable` the name (or path) to the executable used.

    :param steplist: sequence of known steps
    :rtype: :class:``
    """
    model_cls = steplist[args.model]
    try:
        model_instance = model_cls(args.executable)
    except environment.MissingSoftware:
        raise UnifexError("""The executable '%s' to use with the model class '%s' is not in the ${PATH}.
Use either the full path, or add the executable to your PATH.
        """ % (args.executable, args.model))
    except Exception as e:
        raise UnifexError("""Internal error while creating a %s using %s:
%s
""" % (model_cls,
       args.executable,
       e))
    return model_instance


[docs]def build_AssetSet(AssetSet, 
                   values):
    """
    :param AssetSet:
    :param values: values to create instances in the AssetSet
    """
    # classes expected for the assets
    assetset_cls = getattr(AssetSet, core.AssetMetaReserved.SOURCES.value)
    assets = list()
    for x, val in zip(assetset_cls, values):
        if val is None:
            # if None, whether it is allowed (allownone True/False) is pushed to the construction
            # of the AssetSet below
            #FIXME: is this really the most transparent way to do it ?
            a = val
        else:
            if issubclass(x.cls, core.FileSequence):
                a = x.cls(x.cls._type(z) for z in val)
            else:
                a = x.cls(val)
        assets.append(a)
    assetset = AssetSet(*assets)
    return assetset

[docs]def unified_exec_run(args, steplist, msg=[]):
    """
    Run a command.
    :param args: arguments in a class such as the one returned by :meth:`argparse.ArgumentParser.parse`
    :param steplist: sequence of known steps. The `args` will be matched against this to find the model class.
    :type steplist: sequence of :class:`core.StepAbstract`-inherting instances
    :param msg: list with (eventual) messages
    """
    #FIXME: deprecate msg (use logging)
    _set_logging(args)
    model = _model_instance(args, steplist)
    Assets = model.Assets

    # extract the sources from the args
    sources_dict = _extract_argdict(args.source)
    tmp = ['Source parameters:', ]
    tmp.extend('%s: %s%s' % (k, str(v), os.linesep) for k,v in sources_dict.items())
    logger.debug(os.linesep.join(tmp))
    sources = tuple(sources_dict.get(f) for f in Assets.Source._fields)
    sources_undefined = list()
    for src, assetattr, assetname in zip(sources, Assets.Source._sources, Assets.Source._fields):
        if src is None:
            if not assetattr.allownone:
                sources_undefined.append(assetname)
    if len(sources_undefined) > 0:
        msg.append('The following sources must be defined (and are missing):\n'+\
                   '\n'.join('- %s' % sources_undefined))

    # extract the targets from the args
    targets_dict = _extract_argdict(args.target)
    targets = tuple(targets_dict.get(f) for f in Assets.Target._fields)

    if None in targets:
        msg.append('The following targets must be defined (and are missing):\n'+\
                   '\n'.join('- %s' % (y) for x,y in enumerate(Assets.Target._fields) if targets[x] is None))

    # exit early if missing parameters
    if len(msg) > 0:
        raise ValueError('\n'.join(msg))

    # build the assets
    assets = Assets(build_AssetSet(Assets.Source, sources),
                    build_AssetSet(Assets.Target, targets))
    cleanparameters = tuple(x.lstrip() for x in args.parameters)
    cmd, returncode = model.run(assets, cleanparameters)

    msg = 'return code: %i' % returncode
    logger.debug(msg)
    if returncode != 0:
        sys.stderr.write(msg)
        sys.stderr.write('\n')
    return returncode

def unified_exec_version(args, steplist):
    _set_logging(args)
    executable = _model_instance(args, steplist)
    print(executable.version)
    return 0

def unified_exec_activities(args, steplist):
    _set_logging(args)
    executable = _model_instance(args, steplist)
    print(executable.activities)
    return 0

def unified_exec_model(args, steplist):
    _set_logging(args)
    print(' '.join(str(x) for x in steplist.keys()))
    return 0

def unified_exec():

    def _steplist(module_name):
        module = import_module(module_name)
        steplist = _make_stepdict(module)
        return steplist

    def _unified_exec_run(args):
        msg = []
        try:
            returncode = unified_exec_run(args, _steplist(args.module), msg=msg)
        except ValueError as ve:
            logger.error(msg)
            raise ve
        return returncode

    def _unified_exec_version(args):
        return unified_exec_version(args, _steplist(args.module))
    def _unified_exec_activities(args):
        return unified_exec_activities(args, _steplist(args.module))
    def _unified_exec_model(args):
        return unified_exec_model(args, _steplist(args.module))

    parser = argparse.ArgumentParser(description='Unified model for RNA-Seq steps')
    subparsers = parser.add_subparsers(help='Action to perform. `run` will call the executable specified in the next parameter, `version` will return the version number, `activities` will list the RNA-Seq activities the executable is associated with')

    #FIXME: This means that one will be able to point to different R version, and packages will be coming
    #       from that R version


    # run
    parser_run = subparsers.add_parser('run', help="Run a step")
    #FIXME: This means that one will be able to point to different R version, and packages will be coming
    #       from that R version
    parser_run.add_argument('model',
                            help='Class name used to model that executable. The classes are defined in the model (see -m/--module).')
    parser_run.add_argument('executable',
                            nargs='?', # this is optional
                            help='Name, or full path, for an executable. '
                            'Note that the executable can be R, '
                            'and in that case the class name (argument "model") '
                            'will wrap an R script to be run with that R version.')
    parser_run.add_argument('-m', '--module',
                            default='railroadtracks.rnaseq',
                            help='module defining all models (default: %(default)s)')
    parser_run.add_argument('-s', '--source', nargs='*',
                            help='Source file indicated as <label>=<filename>')
    parser_run.add_argument('-t', '--target', nargs='*',
                            help='target file indicated as <label>=<filename>')
    parser_run.add_argument('-p', '--parameters', nargs='*',
                            default = (),
                            help='Additional parameter(s) for the wrapped executable')
    parser_run.add_argument('--logging-file',
                            help='Name of log file.')
    parser_run.add_argument('--logging-level',
                            choices = LOGGING_LEVELS,
                            default = 'INFO',
                            help='Level of logging information. DEBUG is quite verbose (default: %(default)s).')
    parser_run.set_defaults(func = _unified_exec_run)

    # version
    parser_version = subparsers.add_parser('version',
                                           help="Query version number")
    parser_version.add_argument('model',
                                help='Class name used to model that executable. The classes are defined in the model (see -m/--module).')
    parser_version.add_argument('executable', 
                                nargs='?', # this is optional
                                help='Name, or full path, for an executable. '
                                'Note that the executable can be R, '
                                'and in that case the class name (argument "model") '
                                'will wrap an R script to be run with that R version.')
    parser_version.add_argument('-m', '--module',
                                default='railroadtracks.rnaseq',
                                help='module defining all models (default: %(default)s)')
    parser_version.add_argument('--logging-file',
                                help='Name of log file.')
    parser_version.add_argument('--logging-level',
                                choices = ('INFO', 'DEBUG'),
                                default = 'INFO',
                                help='Level of logging information. DEBUG is quite verbose (default: %(default)s).')

    parser_version.set_defaults(func = _unified_exec_version)

    # activities
    parser_activities = subparsers.add_parser('activities',
                                              help="Query activities")
    parser_activities.add_argument('model',
                                   help='Class name used to model that executable. The classes are defined in the model (see -m/--module).')
    parser_activities.add_argument('-m', '--module',
                                   default='railroadtracks.rnaseq',
                                   help='module defining all models (default: %(default)s)')
    parser_activities.add_argument('executable',
                                   nargs='?', # this is optional
                                   help='Name, or full path, for an executable. '
                                   'Note that the executable can be R, '
                                   'and in that case the class name (argument "model") '
                                   'will wrap an R script to be run with that R version.')
    parser_activities.add_argument('--logging-file',
                                   help='Name of log file.')
    parser_activities.add_argument('--logging-level',
                                   choices = ('INFO', 'DEBUG'),
                                   default = 'INFO',
                                   help='Level of logging information. DEBUG is quite verbose (default: %(default)s).')

    parser_activities.set_defaults(func = _unified_exec_activities)

    # model
    parser_model = subparsers.add_parser('model',
                                         help="Query on a model module")
    parser_model.add_argument('-m', '--module',
                              default='railroadtracks.rnaseq',
                              help='module defining all models (default: %(default)s)')
    parser_model.add_argument('--logging-file',
                              help='Name of log file.')
    parser_model.add_argument('--logging-level',
                              choices = ('INFO', 'DEBUG'),
                              default = 'INFO',
                              help='Level of logging information. DEBUG is quite verbose (default: %(default)s).')

    parser_model.set_defaults(func = _unified_exec_model)
    
    args = parser.parse_args()
    returncode = args.func(args)


if __name__ == '__main__':
    import sys
    returncode = unified_exec()
    sys.exit(returncode)