mw.database – MySQL database abstraction
This module contains a set of utilities for interacting with MediaWiki databases.
Here’s an example of a common usage pattern:
from mw import database
db = database.DB.from_params(
host="s1-analytics-slave.eqiad.wmnet",
read_default_file="~/.my.cnf",
user="research",
db="enwiki"
)
revisions = db.revisions.query(user_id=9133062)
for rev_row in revisions:
rev_row['rev_id']
DB
-
class mw.database.DB(connection)
Represents a connection to a MySQL database.
Parameters: |
- connection = oursql.Connection
A connection to a MediaWiki database
|
-
revisions = None
An instance of mw.database.Revisions.
-
archives = None
An instance of mw.database.Archives.
-
all_revisions = None
An instance of mw.database.AllRevisions.
-
pages = None
An instance of mw.database.Pages.
-
users = None
An instance of mw.database.Users.
-
classmethod add_arguments(parser, defaults=None)
Adds the arguments to an argparse.ArgumentParser in order to
create a database connection.
-
classmethod from_arguments(args)
Constructs a DB.
Consumes argparse.ArgumentParser arguments given by
add_arguments() in order to create a DB.
Parameters: |
- args : argparse.Namespace
A collection of argument values returned by argparse.ArgumentParser‘s parse_args()
|
-
classmethod from_params(*args, **kwargs)
Constructs a DB. Passes *args and **kwargs
to oursql.connect() and configures the connection.
Parameters: |
- args : argparse.Namespace
A collection of argument values returned by argparse.ArgumentParser‘s parse_args()
|
Collections
-
class mw.database.Archives(db)
-
get(rev_id)
Gets a single revisions by ID. Checks the archive table. This
method throws a KeyError if a revision cannot be found.
Parameters: |
- rev_id : int
Revision ID
|
Returns: | A revision row
|
-
query(page_id=None, user_id=None, user_text=None, before=None, after=None, before_id=None, after_id=None, before_ar_id=None, after_ar_id=None, direction=None, limit=None, include_page=True)
Queries archived revisions (revisions of deleted pages)
Parameters: |
- page_id : int
Page identifier. Filter revisions to this page.
- user_id : int
User identifier. Filter revisions to those made by this user.
- user_text : str
User text (user_name or IP address). Filter revisions to those
made by this user.
- before : mw.Timestamp
Filter revisions to those made before this timestamp.
- after : mw.Timestamp
Filter revisions to those made after this timestamp.
- before_id : int
Filter revisions to those with an ID before this ID
- after_id : int
Filter revisions to those with an ID after this ID
- direction : str
“newer” or “older”
- limit : int
Limit the number of results
- include_page : bool
This field is ignored. It’s only here for compatibility with
mw.database.Revision.
|
Returns: | An iterator over revision rows.
|
-
class mw.database.AllRevisions(db)
-
get(rev_id, include_page=False)
Gets a single revisions by ID. Checks both the revision and
archive tables. This method throws a KeyError if a
revision cannot be found.
Parameters: |
- rev_id : int
Revision ID
- include_page : bool
Join revision returned against page
|
Returns: | A revision row
|
-
query(*args, **kwargs)
Queries revisions (excludes revisions to deleted pages)
Parameters: |
- page_id : int
Page identifier. Filter revisions to this page.
- user_id : int
User identifier. Filter revisions to those made by this user.
- user_text : str
User text (user_name or IP address). Filter revisions to those
made by this user.
- before : mw.Timestamp
Filter revisions to those made before this timestamp.
- after : mw.Timestamp
Filter revisions to those made after this timestamp.
- before_id : int
Filter revisions to those with an ID before this ID
- after_id : int
Filter revisions to those with an ID after this ID
- direction : str
“newer” or “older”
- limit : int
Limit the number of results
- include_page : bool
Join revisions returned against page
|
Returns: | An iterator over revision rows.
|
-
class mw.database.Pages(db)
-
get(page_id=None, namespace_title=None, rev_id=None)
Gets a single page based on a legitimate identifier of the page. Note
that namespace_title expects a tuple of namespace ID and title.
Parameters: |
- page_id : int
Page ID
- namespace_title : ( int, str )
the page’s namespace ID and title
- rev_id : int
a revision ID included in the page’s history
|
Returns: | iterator over result rows
|
-
class mw.database.RecentChanges(db)
-
listen(last=None, types=None, max_wait=5)
Listens to the recent changes table. Given no parameters, this function
will return an iterator over the entire recentchanges table and then
continue to “listen” for new changes to come in every 5 seconds.
Parameters: |
- last : dict
a recentchanges row to pick up after
- types : set ( str )
a set of recentchanges types to filter for
- max_wait : float
the maximum number of seconds to wait between repeated queries
|
Returns: | A never-ending iterator over change rows.
|
-
query(before=None, after=None, before_id=None, after_id=None, types=None, direction=None, limit=None)
Queries the recentchanges table. See
https://www.mediawiki.org/wiki/Manual:Recentchanges_table
Parameters: |
- before : mw.Timestamp
The maximum timestamp
- after : mw.Timestamp
The minimum timestamp
- before_id : int
The minimum rc_id
- after_id : int
The maximum rc_id
- types : set ( str )
Which types of changes to return?
- edit – Edits to existing pages
- new – Edits that create new pages
- move – (obsolete)
- log – Log actions (introduced in MediaWiki 1.2)
- move_over_redirect – (obsolete)
- external – An external recent change. Primarily used by Wikidata
- direction : str
“older” or “newer”
- limit : int
limit the number of records returned
|
-
class mw.database.Revisions(db)
-
get(rev_id, include_page=False)
Gets a single revisions by ID. Checks the revision table. This
method throws a KeyError if a revision cannot be found.
Parameters: |
- rev_id : int
Revision ID
- include_page : bool
Join revision returned against page
|
Returns: | A revision row
|
-
query(page_id=None, user_id=None, user_text=None, before=None, after=None, before_id=None, after_id=None, direction=None, limit=None, include_page=False)
Queries revisions (excludes revisions to deleted pages)
Parameters: |
- page_id : int
Page identifier. Filter revisions to this page.
- user_id : int
User identifier. Filter revisions to those made by this user.
- user_text : str
User text (user_name or IP address). Filter revisions to those
made by this user.
- before : mw.Timestamp
Filter revisions to those made before this timestamp.
- after : mw.Timestamp
Filter revisions to those made after this timestamp.
- before_id : int
Filter revisions to those with an ID before this ID
- after_id : int
Filter revisions to those with an ID after this ID
- direction : str
“newer” or “older”
- limit : int
Limit the number of results
- include_page : bool
Join revisions returned against page
|
Returns: | An iterator over revision rows.
|
-
class mw.database.Users(db)
-
get(user_id=None, user_name=None)
Gets a single user row from the database. Raises a KeyError
if a user cannot be found.
Parameters: |
- user_id : int
User ID
- user_name : str
User’s name
|
Returns: | A user row.
|
-
query(registered_before=None, registered_after=None, before_id=None, after_id=None, limit=None, direction=None, self_created_only=False)
Queries users based on various filtering parameters.
Parameters: |
- registered_before : mw.Timestamp
A timestamp to search before (inclusive)
- registered_after : mw.Timestamp
A timestamp to search after (inclusive)
- before_id : int
A user_id to search before (inclusive)
- after_id : int
A user_ud to search after (inclusive)
- direction : str
“newer” or “older”
- limit : int
Limit the results to at most this number
- self_creations_only : bool
limit results to self_created user accounts
|
Returns: | an iterator over user table rows
|