Source code for kwalitee.kwalitee

# -*- coding: utf-8 -*-
#
# This file is part of kwalitee
# Copyright (C) 2014, 2015 CERN.
#
# kwalitee is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# kwalitee is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with kwalitee; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
#
# In applying this licence, CERN does not waive the privileges and immunities
# granted to it by virtue of its status as an Intergovernmental Organization
# or submit itself to any jurisdiction.

"""Kwalitee checks for PEP8, PEP257, PyFlakes and License."""

from __future__ import unicode_literals

import codecs
import os
import re
import tokenize

from datetime import datetime

import pep257

import pep8

import pyflakes
import pyflakes.checker


SUPPORTED_FILES = '.py', '.html', '.tpl', '.js', '.jsx', '.css', '.less'
"""Supported file types."""

_re_copyright_year = re.compile(r"^Copyright\s+(?:\([Cc]\)|\xa9)\s+"
                                r"(?:\d{4},\s+)*"
                                r"(?P<year>\d{4})\s+CERN\.?$",
                                re.UNICODE | re.MULTILINE)

_re_program = re.compile(r"^(?P<program>.*?) is free software;",
                         re.UNICODE | re.MULTILINE)
_re_program_2 = re.compile(r"^(?P<program>.*?) is distributed in",
                           re.UNICODE | re.MULTILINE)
_re_program_3 = re.compile(r"GNU General Public License\s+along\s+with "
                           r"(?P<program>.*?)[;\.]",
                           re.UNICODE | re.MULTILINE)

_re_bullet_label = re.compile(r"^\* (?P<label>[A-Z]{1,70}) ", re.UNICODE)

_messages_codes = {
    # Global
    "M100": "needs more reviewers",
    "M101": "signature is missing",
    "M102": "unrecognized bullet/signature",
    # First line
    "M110": "missing component name",
    "M111": "unrecognized component name: {0}",
    # Dots
    "M120": "missing empty line before bullet",
    "M121": "indentation of two spaces expected",
    "M122": "unrecognized bullet label: {0}",
    # Signatures
    "M130": "no bullets are allowed after signatures",
    # Generic
    "M190": "line is too long ({1} > {0})",
    "M191": "must not end with a dot '.'",
}

_licenses_codes = {
    "L100": "license is missing",
    "L101": "copyright is missing",
    "L102": "copyright year is outdated, expected {0} but got {1}",
    "L103": "license is not GNU GPLv2",
    "L190": "file cannot be decoded as {0}"
}


def _check_1st_line(line, **kwargs):
    """First line check.

    Check that the first line has a known component name followed by a colon
    and then a short description of the commit.

    :param line: first line
    :type line: str
    :param components: list of known component names
    :type line: list
    :param max_first_line: maximum length of the first line
    :type max_first_line: int
    :return: errors as in (code, line number, *args)
    :rtype: list

    """
    components = kwargs.get("components", ())
    max_first_line = kwargs.get("max_first_line", 50)

    errors = []
    lineno = 1
    if len(line) > max_first_line:
        errors.append(("M190", lineno, max_first_line, len(line)))

    if line.endswith("."):
        errors.append(("M191", lineno))

    if ':' not in line:
        errors.append(("M110", lineno))
    else:
        component, msg = line.split(':', 1)
        if component not in components:
            errors.append(("M111", lineno, component))

    return errors


def _check_bullets(lines, **kwargs):
    """Check that the bullet point list is well formatted.

    Each bullet point shall have one space before and after it. The bullet
    character is the "*" and there is no space before it but one after it
    meaning the next line are starting with two blanks spaces to respect the
    indentation.

    :param lines: all the lines of the message
    :type lines: list
    :param max_lengths: maximum length of any line. (Default 72)
    :return: errors as in (code, line number, *args)
    :rtype: list

    """
    max_length = kwargs.get("max_length", 72)
    labels = {l for l, _ in kwargs.get("commit_msg_labels", tuple())}

    errors = []
    missed_lines = []
    skipped = []

    for (i, line) in enumerate(lines[1:]):
        if line.startswith('*'):
            if len(missed_lines) > 0:
                errors.append(("M130", i + 2))
            if lines[i].strip() != '':
                errors.append(("M120", i + 2))

            label = _re_bullet_label.search(line)
            if label and label.group('label') not in labels:
                errors.append(("M122", i + 2, label.group('label')))

            for (j, indented) in enumerate(lines[i + 2:]):
                if indented.strip() == '':
                    break
                if not re.search(r"^ {2}\S", indented):
                    errors.append(("M121", i + j + 3))
                else:
                    skipped.append(i + j + 1)
        elif i not in skipped and line.strip():
            missed_lines.append((i + 2, line))

        if len(line) > max_length:
            errors.append(("M190", i + 2, max_length, len(line)))

    return errors, missed_lines


def _check_signatures(lines, **kwargs):
    """Check that the signatures are valid.

    There should be at least three signatures. If not, one of them should be a
    trusted developer/reviewer.

    Formatting supported being: [signature] full name <email@address>

    :param lines: lines (lineno, content) to verify.
    :type lines: list
    :param signatures: list of supported signature
    :type signatures: list
    :param alt_signatures: list of alternative signatures, not counted
    :type alt_signatures: list
    :param trusted: list of trusted reviewers, the e-mail address.
    :type trusted: list
    :param min_reviewers: minimal number of reviewers needed. (Default 3)
    :type min_reviewers: int
    :return: errors as in (code, line number, *args)
    :rtype: list

    """
    trusted = kwargs.get("trusted", ())
    signatures = tuple(kwargs.get("signatures", ()))
    alt_signatures = tuple(kwargs.get("alt_signatures", ()))
    min_reviewers = kwargs.get("min_reviewers", 3)

    matching = []
    errors = []
    signatures += alt_signatures

    test_signatures = re.compile("^({0})".format("|".join(signatures)))
    test_alt_signatures = re.compile("^({0})".format("|".join(alt_signatures)))
    for i, line in lines:
        if signatures and test_signatures.search(line):
            if line.endswith("."):
                errors.append(("M191", i))
            if not alt_signatures or not test_alt_signatures.search(line):
                matching.append(line)
        else:
            errors.append(("M102", i))

    if not matching:
        errors.append(("M101", 1))
        errors.append(("M100", 1))
    elif len(matching) < min_reviewers:
        pattern = re.compile('|'.join(map(lambda x: '<' + re.escape(x) + '>',
                                          trusted)))
        trusted_matching = list(filter(None, map(pattern.search, matching)))
        if len(trusted_matching) == 0:
            errors.append(("M100", 1))

    return errors


[docs]def check_message(message, **kwargs):
    """Check the message format.

    Rules:

    - the first line must start by a component name
    - and a short description (52 chars),
    - then bullet points are expected
    - and finally signatures.

    :param components: compontents, e.g. ``('auth', 'utils', 'misc')``
    :type components: `list`
    :param signatures: signatures, e.g. ``('Signed-off-by', 'Reviewed-by')``
    :type signatures: `list`
    :param alt_signatures: alternative signatures, e.g. ``('Tested-by',)``
    :type alt_signatures: `list`
    :param trusted: optional list of reviewers, e.g. ``('john.doe@foo.org',)``
    :type trusted: `list`
    :param max_length: optional maximum line length (by default: 72)
    :type max_length: int
    :param max_first_line: optional maximum first line length (by default: 50)
    :type max_first_line: int
    :param allow_empty: optional way to allow empty message (by default: False)
    :type allow_empty: bool
    :return: errors sorted by line number
    :rtype: `list`
    """
    if kwargs.pop("allow_empty", False):
        if not message or message.isspace():
            return []

    lines = re.split(r"\r\n|\r|\n", message)
    errors = _check_1st_line(lines[0], **kwargs)
    err, signature_lines = _check_bullets(lines, **kwargs)
    errors += err
    errors += _check_signatures(signature_lines, **kwargs)

    def _format(code, lineno, args):
        return "{0}: {1} {2}".format(lineno,
                                     code,
                                     _messages_codes[code].format(*args))

    return list(map(lambda x: _format(x[0], x[1], x[2:]),
                    sorted(errors, key=lambda x: x[0])))


class _PyFlakesChecker(pyflakes.checker.Checker):

    """PEP8 compatible checker for pyFlakes (inspired by flake8)."""

    name = "pyflakes"
    version = pyflakes.__version__

    def run(self):
        """Yield the error messages."""
        for msg in self.messages:
            col = getattr(msg, 'col', 0)
            yield msg.lineno, col, (msg.tpl % msg.message_args), msg.__class__


def _register_pyflakes_check():
    """Register the pyFlakes checker into PEP8 set of checks."""
    from flake8_import_order.flake8_linter import Linter
    from flake8_blind_except import check_blind_except

    # Resolving conflicts between pep8 and pyflakes.
    codes = {
        "UnusedImport": "F401",
        "ImportShadowedByLoopVar": "F402",
        "ImportStarUsed": "F403",
        "LateFutureImport": "F404",
        "Redefined": "F801",
        "RedefinedInListComp": "F812",
        "UndefinedName": "F821",
        "UndefinedExport": "F822",
        "UndefinedLocal": "F823",
        "DuplicateArgument": "F831",
        "UnusedVariable": "F841",
    }

    for name, obj in vars(pyflakes.messages).items():
        if name[0].isupper() and obj.message:
            obj.tpl = "{0} {1}".format(codes.get(name, "F999"), obj.message)

    pep8.register_check(_PyFlakesChecker, codes=['F'])
    # FIXME parser hack
    parser = pep8.get_parser('', '')
    Linter.add_options(parser)
    options, args = parser.parse_args([])
    Linter.parse_options(options)
    # end of hack
    pep8.register_check(Linter, codes=['I'])
    pep8.register_check(check_blind_except, codes=['B90'])
_registered_pyflakes_check = False


class _Report(pep8.BaseReport):

    """Custom reporter.

    It keeps a list of errors in a sortable list and never prints.
    """

    def __init__(self, options):
        """Initialize the reporter."""
        super(_Report, self).__init__(options)
        self.errors = []

    def error(self, line_number, offset, text, check):
        """Run the checks and collect the errors."""
        code = super(_Report, self).error(line_number, offset, text, check)
        if code:
            self.errors.append((line_number, offset + 1, code, text, check))


[docs]def is_file_excluded(filename, excludes):
    """Check if the file should be excluded.

    :param filename: file name
    :param excludes: list of regex to match
    :return: True if the file should be excluded
    """
    # check if you need to exclude this file
    return any([exclude and re.match(exclude, filename) is not None
                for exclude in excludes])


[docs]def check_pep8(filename, **kwargs):
    """Perform static analysis on the given file.

    :param filename: path of file to check.
    :type filename: str
    :param ignore: codes to ignore, e.g. ``('E111', 'E123')``
    :type ignore: `list`
    :param select: codes to explicitly select.
    :type select: `list`
    :param pyflakes: run the pyflakes checks too (default ``True``)
    :type pyflakes: bool
    :return: errors
    :rtype: `list`

    .. seealso:: :py:class:`pep8.Checker`

    """
    options = {
        "ignore": kwargs.get("ignore"),
        "select": kwargs.get("select"),
    }

    if not _registered_pyflakes_check and kwargs.get("pyflakes", True):
        _register_pyflakes_check()

    checker = pep8.Checker(filename, reporter=_Report, **options)
    checker.check_all()

    errors = []
    for error in sorted(checker.report.errors, key=lambda x: x[0]):
        errors.append("{0}:{1}: {3}".format(*error))
    return errors


[docs]def check_pep257(filename, **kwargs):
    """Perform static analysis on the given file docstrings.

    :param filename: path of file to check.
    :type filename: str
    :param ignore: codes to ignore, e.g. ('D400',)
    :type ignore: `list`
    :param match: regex the filename has to match to be checked
    :type match: str
    :param match_dir: regex everydir in path should match to be checked
    :type match_dir: str
    :return: errors
    :rtype: `list`

    .. seealso:: `GreenSteam/pep257 <https://github.com/GreenSteam/pep257/>`_

    """
    ignore = kwargs.get("ignore")
    match = kwargs.get("match", None)
    match_dir = kwargs.get("match_dir", None)

    errors = []

    if match and not re.match(match, os.path.basename(filename)):
        return errors

    if match_dir:
        # FIXME here the full path is checked, be sure, if match_dir doesn't
        # match the path (usually temporary) before the actual application path
        # it may not run the checks when it should have.
        path = os.path.split(os.path.abspath(filename))[0]
        while path != "/":
            path, dirname = os.path.split(path)
            if not re.match(match_dir, dirname):
                return errors

    checker = pep257.PEP257Checker()
    with open(filename) as fp:
        try:
            for error in checker.check_source(fp.read(), filename):
                if ignore is None or error.code not in ignore:
                    # Removing the colon ':' after the error code
                    message = re.sub("(D[0-9]{3}): ?(.*)",
                                     r"\1 \2",
                                     error.message)
                    errors.append("{0}: {1}".format(error.line, message))
        except tokenize.TokenError as e:
            errors.append("{1}:{2} {0}".format(e.args[0], *e.args[1]))
        except pep257.AllError as e:
            errors.append(str(e))

    return errors


[docs]def check_license(filename, **kwargs):
    """Perform a license check on the given file.

    The license format should be commented using # and live at the top of the
    file. Also, the year should be the current one.

    :param filename: path of file to check.
    :type filename: str
    :param year: default current year
    :type year: int
    :param ignore: codes to ignore, e.g. ``('L100', 'L101')``
    :type ignore: `list`
    :param python_style: False for JavaScript or CSS files
    :type python_style: bool
    :return: errors
    :rtype: `list`

    """
    year = kwargs.pop("year", datetime.now().year)
    python_style = kwargs.pop("python_style", True)
    ignores = kwargs.get("ignore")
    template = "{0}: {1} {2}"

    if python_style:
        re_comment = re.compile(r"^#.*|\{#.*|[\r\n]+$")
        starter = "# "
    else:
        re_comment = re.compile(r"^/\*.*| \*.*|[\r\n]+$")
        starter = " *"

    errors = []
    lines = []
    file_is_empty = False
    license = ""
    lineno = 0
    try:
        with codecs.open(filename, "r", "utf-8") as fp:
            line = fp.readline()
            blocks = []
            while re_comment.match(line):
                if line.startswith(starter):
                    line = line[len(starter):].lstrip()
                    blocks.append(line)
                    lines.append((lineno, line.strip()))
                lineno, line = lineno + 1, fp.readline()
            file_is_empty = line == ""
            license = "".join(blocks)
    except UnicodeDecodeError:
        errors.append((lineno + 1, "L190", "utf-8"))
        license = ""

    if file_is_empty and not license.strip():
        return errors

    match_year = _re_copyright_year.search(license)
    if match_year is None:
        errors.append((lineno + 1, "L101"))
    elif int(match_year.group("year")) != year:
        theline = match_year.group(0)
        lno = lineno
        for no, l in lines:
            if theline.strip() == l:
                lno = no
                break
        errors.append((lno + 1, "L102", year, match_year.group("year")))
    else:
        program_match = _re_program.search(license)
        program_2_match = _re_program_2.search(license)
        program_3_match = _re_program_3.search(license)
        if program_match is None:
            errors.append((lineno, "L100"))
        elif (program_2_match is None or
              program_3_match is None or
              (program_match.group("program").upper() !=
               program_2_match.group("program").upper() !=
               program_3_match.group("program").upper())):
            errors.append((lineno, "L103"))

    def _format_error(lineno, code, *args):
        return template.format(lineno, code,
                               _licenses_codes[code].format(*args))

    def _filter_codes(error):
        if not ignores or error[1] not in ignores:
            return error

    return list(map(lambda x: _format_error(*x),
                    filter(_filter_codes, errors)))


[docs]def check_file(filename, **kwargs):
    """Perform static analysis on the given file.

    .. seealso::

        - :data:`.SUPPORTED_FILES`
        - :func:`.check_pep8`
        - :func:`.check_pep257`
        - and :func:`.check_license`

    :param filename: path of file to check.
    :type filename: str
    :return: errors sorted by line number or None if file is excluded
    :rtype: `list`

    """
    excludes = kwargs.get("excludes", [])
    errors = []

    if is_file_excluded(filename, excludes):
        return None

    if filename.endswith(".py"):
        if kwargs.get("pep8", True):
            errors += check_pep8(filename, **kwargs)
        if kwargs.get("pep257", True):
            errors += check_pep257(filename, **kwargs)
        if kwargs.get("license", True):
            errors += check_license(filename, **kwargs)
    elif re.search("\.(tpl|html)$", filename):
        errors += check_license(filename, **kwargs)
    elif re.search("\.(js|jsx|css|less)$", filename):
        errors += check_license(filename, python_style=False, **kwargs)

    def try_to_int(value):
        try:
            return int(value.split(':', 1)[0])
        except ValueError:
            return 0

    return sorted(errors, key=try_to_int)


[docs]def get_options(config):
    """Build the options from the Flask config."""
    base = {
        "components": config.get("COMPONENTS"),
        "signatures": config.get("SIGNATURES"),
        "commit_msg_template": config.get("COMMIT_MSG_TEMPLATE"),
        "commit_msg_labels": config.get("COMMIT_MSG_LABELS"),
        "alt_signatures": config.get("ALT_SIGNATURES"),
        "trusted": config.get("TRUSTED_DEVELOPERS"),
        "pep8": config.get("CHECK_PEP8", True),
        "pep257": config.get("CHECK_PEP257", True),
        "license": config.get("CHECK_LICENSE", True),
        "pyflakes": config.get("CHECK_PYFLAKES", True),
        "ignore": config.get("IGNORE"),
        "select": config.get("SELECT"),
        "match": config.get("PEP257_MATCH"),
        "match_dir": config.get("PEP257_MATCH_DIR"),
        "min_reviewers": config.get("MIN_REVIEWERS"),
        "colors": config.get("COLORS", True),
        "excludes": config.get("EXCLUDES", [])
    }
    options = {}
    for k, v in base.items():
        if v is not None:
            options[k] = v
    return options