Source code for revscoring.languages.italian

from .features import Dictionary, RegexMatches, Stemmed, Stopwords

name = "italian"

try:
    import enchant
    dictionary = enchant.Dict("it")
except enchant.errors.DictNotFoundError:
    raise ImportError("No enchant-compatible dictionary found for 'it'.  " +
                      "Consider installing 'myspell-it'.")

dictionary = Dictionary(name + ".dictionary", dictionary.check)
"""
:class:`~revscoring.languages.features.Dictionary` features via
:class:`enchant.Dict` "it". Provided by `myspell-it`
"""

try:
    from nltk.corpus import stopwords as nltk_stopwords
    stopwords = set(nltk_stopwords.words('italian'))
except LookupError:
    raise ImportError("Could not load stopwords for {0}. ".format(__name__) +
                      "You may need to install the nltk 'stopwords' " +
                      "corpora.  See http://www.nltk.org/data.html")

stopwords = Stopwords(name + ".stopwords", stopwords)
"""
:class:`~revscoring.languages.features.Stopwords` features provided by
:func:`nltk.corpus.stopwords` "italian"
"""

try:
    from nltk.stem.snowball import SnowballStemmer
    stemmer = SnowballStemmer("italian")
except ValueError:
    raise ImportError("Could not load stemmer for {0}. ".format(__name__))

stemmed = Stemmed(name + ".stemmed", stemmer.stem)
"""
:class:`~revscoring.languages.features.Stemmed` word features via
:class:`nltk.stem.snowball.SnowballStemmer` "italian"
"""

badword_regexes = [
    r"anale",
    r"ano",
    r"bastard[io]",
    r"buttana",
    r"cac(a|are|ata)",
    r"cacc(a|he|ol[ae])",
    r"caga(re|t[aeo])?",
    r"cavolat[ae]",
    r"cazz(at[ae]|[io]|on[ei])?",
    r"cesso",
    r"ciccione",
    r"ciuccia",
    r"coglion[ei]",
    r"cojone",
    r"cretin[io]",
    r"culo",
    r"deficent[ei]",
    r"fancazzista",
    r"fanculo",
    r"fi[cg](a|he|o)",
    r"fott(e|iti|uto)",
    r"fregna",
    r"froci(o|one)?",
    r"gnocca",
    r"idiota",
    r"incul(are|ato|o)",
    r"kazzo",
    r"maiala",
    r"merd(a|accia|e|os[oa])?",
    r"mignotta",
    r"min(ch|k)ia",
    r"pen[ei]",
    r"pip+[iae]",
    r"pirla",
    r"piscio",
    r"pisell(ino|o|one)",
    r"pompin[io]",
    r"porc[ao]",
    r"porno",
    r"pupu",
    r"putta(n[ea]|nella|niere)?",
    r"puzza(no|te|va)?", "puzz(i|olente|one)",
    r"ricchione",
    r"sb[ou]rr[ao]",
    r"scem[aio]",
    r"schifo(s[ao])?",
    r"scopa(re|t[aeo]|va)",
    r"scor+egg(ia|e)",
    r"seghe",
    r"sfigat[io]",
    r"siffredi",
    r"skifo",
    r"sperma",
    r"stocazzo",
    r"stronza(t[ae])?",
    r"stronz[io]",
    r"stupido",
    r"suca", r"succhia",
    r"suka",
    r"tette",
    r"troi[ae]",
    r"trombare",
    r"vaf+anculo",
    r"znccn[kl]",
    r"zoccola"
]

badwords = RegexMatches(name + ".badwords", badword_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
badword detecting regexes.
"""

informal_regexes = [
    "(ah)+a?",
    "amo",
    "avete",
    "banana",
    "bla",
    "brutt[io]",
    "cavolo",
    "chiamo",
    "ciao+",
    "cmq",
    "consigliamo",
    "corsivo",
    "dovete",
    "fermatevi",
    "frega",
    "grassetto",
    "ha(ha)+",
    "inserisci",
    "intestazione",
    "leggete",
    "lol",
    "mamma",
    "nascondi",
    "perche",
    "potete",
    "raga",
    "sapete",
    "scrivete",
    "scusate",
    "siete",
    "sto",
    "tua",
    "volete"
]

informals = RegexMatches(name + ".informals", informal_regexes)
"""
:class:`~revscoring.languages.features.RegexMatches` features via a list of
informal word detecting regexes.
"""

Revision Scoring

Navigation