revscoring.languages.features
Dictionary
Implements a feature set based off of dictionary lookup.
-
class revscoring.languages.features.Dictionary(name, dictionary_check)
| Parameters: |
- name : str
A name for the collection
- dictionary_check : func
A function that, given a word, performs a dictionary check and
returns True if the word exists.
|
-
revision = None
Revision :
The base revision feature set.
Supporting classes
-
class revscoring.languages.features.dictionary.Revision(name, revision_datasources)
-
dict_words = None
int : A count of the number of dictionary words in the revision
-
non_dict_words = None
int : A count of the number of non-dictionary words in the revision
-
parent = None
Revision : The
parent revision
-
diff = None
Diff : The
diff between the parent and current revision.
-
class revscoring.languages.features.dictionary.Diff(name, diff_datasources)
-
dict_words_added = None
int : A count of the number of dictionary words added
-
dict_words_removed = None
int : A count of the number of dictionary words removed
-
non_dict_words_added = None
int : A count of the number of non-dictionary words added
-
non_dict_words_removed = None
int : A count of the number of non-dictionary words removed
-
dict_word_delta_sum = None
int : The sum of word frequency deltas for dictionary words
-
dict_word_delta_increase = None
int : The sum of word frequency delta increases for dictionary words
-
dict_word_delta_decrease = None
int : The sum of word frequency delta decreases for dictionary
words
-
non_dict_word_delta_sum = None
int : The sum of word frequency deltas for non-dictionary words
-
non_dict_word_delta_increase = None
int : The sum of word frequency delta increases for non-dictionary
words
-
non_dict_word_delta_decrease = None
int : The sum of word frequency delta decreases for non-dictionary
words
-
dict_word_prop_delta_sum = None
float : The sum of word frequency proportional delta for
dictionary words
-
dict_word_prop_delta_increase = None
float : The sum of word frequency proportional delta increases for
dictionary words
-
dict_word_prop_delta_decrease = None
float : The sum of word frequency proportional delta decreases for
dictionary words
-
non_dict_word_prop_delta_sum = None
float : The sum of word frequency proportional delta for
non-dictionary words
-
non_dict_word_prop_delta_increase = None
float : The sum of word frequency proportional delta increase for
non-dictionary words
-
non_dict_word_prop_delta_decrease = None
float : The sum of word frequency proportional delta decrease for
non-dictionary words
RegexMatches
Implements a feature set based off of a set of regexes applied to strings.
-
class revscoring.languages.features.RegexMatches(name, regexes)
| Parameters: |
- name : str
A name for the collection
- regexes : list ( str )
A list of regex patterns to match.
|
-
revision = None
Revision :
The base revision feature set.
Supporting classes
-
class revscoring.languages.features.regex_matches.Revision(name, regexes, revision_datasources)
-
matches = None
int : A count of the number of matches found in the text
-
parent = None
Revision : The
parent revision
-
diff = None
Diff : The
difference made by this revision
-
class revscoring.languages.features.regex_matches.Diff(name, regexes, diff_datasources)
-
matches_added = None
int : The number of matches added in the edit
-
matches_removed = None
int : The number of matches removed in the edit
-
match_delta_sum = None
int : The sum of frequency delta for matched strings
-
match_delta_increase = None
int : The sum of frequency delta increases for matched strings
-
match_delta_decrease = None
int : The sum of frequency delta decreases for matched strings
-
match_prop_delta_sum = None
int : The sum of proportional frequency delta for matched
strings
-
match_prop_delta_increase = None
int : The sum of proportional frequency delta increases for matched
strings
-
match_prop_delta_decrease = None
int : The sum of proportional frequency delta decreases for matched
strings
Stopwords
Implements a feature set based off of filtering words for stopwords
-
class revscoring.languages.features.Stopwords(name, stopword_set)
| Parameters: |
- name : str
A name for the collection
- stopword_set : set ( str )
A set of stopwords
|
-
revision = None
Revision :
The base revision feature set.
Supporting classes
-
class revscoring.languages.features.stopwords.Revision(name, revision_datasources)
-
stopwords = None
int : A count of the number of stopwords in the content
-
non_stopwords = None
int : A count of the number of non-stopwords in the content
-
parent = None
Revision : The
parent revision
-
diff = None
Diff : The
parent revision
-
class revscoring.languages.features.stopwords.Diff(name, diff_datasources)
-
stopwords_added = None
int : A count of stopwords added
-
stopwords_removed = None
int : A count of stopwords removed
-
non_stopwords_added = None
int : A count of non-stopwords added
-
non_stopwords_removed = None
int : A count of non-stopwords removed
-
stopword_delta_sum = None
int : The sum of word frequency deltas for stopwords
-
stopword_delta_increase = None
int : The sum of word frequency delta increases for stopwords
-
stopword_delta_decrease = None
int : The sum of word frequency delta decreases for stopwords
-
non_stopword_delta_sum = None
int : The sum of word frequency deltas for non-stopwords
-
non_stopword_delta_increase = None
int : The sum of word frequency delta increases for non-stopwords
-
non_stopword_delta_decrease = None
int : The sum of word frequency delta decreases for non-stopwords
-
stopword_prop_delta_sum = None
float : The sum of proportional word frequency deltas for stopwords
-
stopword_prop_delta_increase = None
float : The sum of proportional word frequency delta increases for
stopwords
-
stopword_prop_delta_decrease = None
float : The sum of proportional word frequency delta decreases for
stopwords
-
non_stopword_prop_delta_sum = None
float : The sum of proportional word frequency deltas for
non-stopwords
-
non_stopword_prop_delta_increase = None
float : The sum of proportional word frequency delta increases for
non-stopwords
-
non_stopword_prop_delta_decrease = None
float : The sum of proportional word frequency delta decreases for
non-stopwords
Stemmed
Implements a feature set based off of stemmer applied to words.
-
class revscoring.languages.features.Stemmed(name, stem_word)
| Parameters: |
- name : str
A name for the collection
- stem_word : func
A function that, give a word, will return a stemmed version of that
word
|
-
revision = None
Revision :
The base revision feature set.
Supporting classes
-
class revscoring.languages.features.stemmed.Revision(name, revision_datasources)
-
unique_stems = None
int : A count of unique stemmed words.
-
stem_chars = None
int : A count of characters in stemmed words.
-
parent = None
Revision : The
parent revision
-
diff = None
Diff : The
diff between the parent and current revision.
-
class revscoring.languages.features.stemmed.Diff(name, diff_datasources)
-
stem_delta_sum = None
int : The sum of frequency deltas for stemmed words
-
stem_delta_increase = None
int : The sum of frequency delta increases for stemmed words
-
stem_delta_decrease = None
int : The sum of frequency delta decreases for stemmed words
-
stem_prop_delta_sum = None
int : The sum of proportional frequency deltas for stemmed words
-
stem_prop_delta_increase = None
int : The sum of proportional frequency delta increases for stemmed
words
-
stem_prop_delta_decrease = None
int : The sum of proportional frequency delta decreases for stemmed
words