Source code for revscoring.datasources.revision_oriented

"""
Implements a set of datasources oriented off of a single revision.  This is
useful for extracting features of edit and article quality.

.. autodata:: revscoring.datasources.revision_oriented.revision

Supporting classes
++++++++++++++++++

.. autoclass:: revscoring.datasources.revision_oriented.Revision
    :members:
    :member-order: bysource

.. autoclass:: revscoring.datasources.revision_oriented.Diff
    :members:
    :member-order: bysource

.. autoclass:: revscoring.datasources.revision_oriented.Page
    :members:
    :member-order: bysource

.. autoclass:: revscoring.datasources.revision_oriented.Namespace
    :members:
    :member-order: bysource

.. autoclass:: revscoring.datasources.revision_oriented.User
    :members:
    :member-order: bysource

.. autoclass:: revscoring.datasources.revision_oriented.UserInfo
    :members:
    :member-order: bysource

"""
from ..dependencies import DependentSet
from .datasource import Datasource


[docs]class Revision(DependentSet): """ Represents a revision """ def __init__(self, name, include_parent=True, include_user=True, include_user_info=True, include_user_last_revision=False, include_page=True, include_page_creation=False, include_content=False): super().__init__(name) self.id = Datasource(name + ".id") "`int` : Revision ID" self.timestamp = Datasource(name + ".timestamp") ":class:`mwtypes.Timestamp` : Timestamp the revision was saved" self.comment = Datasource(name + ".comment") "`str` : The comment saved with the revision" self.byte_len = Datasource(name + ".byte_length") "`int` : The length of the revision content in bytes" self.minor = Datasource(name + ".minor") "`bool` : Was the revision flagged as minor?" self.content_model = Datasource(name + ".content_model") "`str` : Describes the format of revision content" if include_content: self.text = Datasource(name + ".text") "`str` : The decoded (Unicode) text of the revision content" if include_parent: self.parent = Revision( name + ".parent", include_parent=False, include_user_info=False, include_page=False, include_content=include_content ) """ :class:`~revscoring.datasources.revision_oriented.Revision` : The parent (aka "previous") revision of the page. """ if include_page: self.page = Page( name + ".page", include_creation=include_page_creation ) """ :class:`~revscoring.datasources.revision_oriented.Page` : The page in which the revision was saved. """ if include_user: self.user = User( name + ".user", include_info=include_user_info, include_last_revision=include_user_last_revision ) """ :class:`~revscoring.datasources.revision_oriented.User` : The user who saved the revision. """ if include_content and include_parent: self.diff = Diff( name + ".diff" ) """ :class:`~revscoring.datasources.revision_oriented.Diff` : The difference between this revision and the parent revision. """
[docs]class User(DependentSet): """ Represents a user's id and name/ip """ def __init__(self, name, include_info=True, include_last_revision=False): super().__init__(name) self.id = Datasource(name + ".id") "`int` : The id of the user who saved the edit. 0 for IPs." self.text = Datasource(name + ".text") "`str` : The user's name or IP address" if include_info: self.info = UserInfo(name + ".info") """ :class:`~revscoring.datasources.revision_oriented.UserInfo` : Information about the user. """ if include_last_revision: self.last_revision = Revision( name + ".last_revision", include_parent=False, include_user=False, include_content=False ) """ :class:`~revscoring.datasources.revision_oriented.Revision` : The last revision the user saved before the revision of reference. """
[docs]class UserInfo(DependentSet): """ Represents a user's information """ def __init__(self, name): super().__init__(name) self.editcount = Datasource(name + ".editcount") "`int` : A count of edits the user has ever saved" self.registration = Datasource(name + ".registration") ":class:`mwtypes.Timestamp` : The date the user registered" self.groups = Datasource(name + ".groups") "`set` ( `str` ) : The groups the user is a member of" self.emailable = Datasource(name + ".emailable") "`bool` : `True` if the users is emailable, `False` otherwise" self.gender = Datasource(name + ".gender") "`str` : A string representing the user's ``gender`` preference."
[docs]class Page(DependentSet): """ Represents a revision's page """ def __init__(self, name, include_creation=False): super().__init__(name) self.id = Datasource(name + ".id") "`int` : The page's ID" self.title = Datasource(name + ".title") "`str` : The page's title (namespace stripped)" self.namespace = Namespace(name + ".namespace") """ :class:`~revscoring.datasources.revision_oriented.Namespace` : The namespace information. """ if include_creation: self.creation = Revision( name + ".creation", include_parent=False, include_page=False, include_content=False, include_user_last_revision=False ) """ :class:`~revscoring.datasources.revision_oriented.Revision` : The first revision to the page. """
[docs]class Namespace(DependentSet): """ Represents a page's namespace """ def __init__(self, name): super().__init__(name) self.id = Datasource(name + ".id") "`int` : The namespace's ID" self.name = Datasource(name + ".name") "`str` : The name of the namespace"
[docs]class Diff(DependentSet): """ Represents the difference between two sequential revisions. """ def __init__(self, name): super().__init__(name)
revision = Revision( "revision", include_page_creation=True, include_content=True, include_user_last_revision=True ) """ Represents the base revision of interest. Implements this structure: * revision: :class:`~revscoring.datasources.revision_oriented.Revision` * diff: :class:`~revscoring.datasources.revision_oriented.Diff` * user: :class:`~revscoring.datasources.revision_oriented.User` * info: :class:`~revscoring.datasources.revision_oriented.UserInfo` * last_revision: * page: :class:`~revscoring.datasources.revision_oriented.Page` * namespace: :class:`~revscoring.datasources.revision_oriented.Namespace` * page: :class:`~revscoring.datasources.revision_oriented.Page` * namespace: :class:`~revscoring.datasources.revision_oriented.Namespace` * creation: :class:`~revscoring.datasources.revision_oriented.Revision` * parent: :class:`~revscoring.datasources.revision_oriented.Revision` * user: :class:`~revscoring.datasources.revision_oriented.User` """ # noqa

Revision Scoring

Navigation