Source code for mw.lib.persistence.tokenization

import re


[docs]def wikitext_split(text): """ Performs the simplest possible split of latin character-based languages and wikitext. :Parameters: text : str Text to split. """ return re.findall( r"[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|.", text )