Source code for verificator

# -*- coding: utf-8 -*-
"""
.. module:: verificator
   :platform: Unix, Windows
   :synopsis: Comparison of user's input and subtitles content

.. moduleauthor:: Anton Konyshev <anton.konyshev@gmail.com>

"""
# License: wxWidgets (wxWindows Library Licence) 3.1

import re
from datetime import datetime
from difflib import ndiff

from pysrt.srtitem import SubRipItem
from pysrt.srttime import SubRipTime


[docs]class Verificator(object): """Checks user answers in the learning process, prepares the subtitle text. """ def __init__(self, player): """:class:`Verificator` serves to check the correctness of user input. :param player: Player frame instance :type player: :class:`player.Player` """ self.player = player self._subtitle = None self._complete = [] self._created_at = datetime.now() self._hint_counter = 0 self._mistakes_counter = 0
[docs] def set_subtitle(self, subtitle, replace=True, force=False): """Sets the current subtitle which will be used to for comparison with user's answer. :param subtitle: Current subtitle :type subtitle: :class:`pysrt.srtitem.SubRipItem` :param replace: Overwrite previously set subtitle :param force: Set the subtitle even if it was completed previously :returns: Execution status :rtype: boolean """ if isinstance(subtitle, SubRipItem): if not self.has_subtitle() or replace: if not self.whether_complete(subtitle) or force: self.clear_subtitle(complete=False) self._subtitle = subtitle return True return False
[docs] def get_subtitle(self): """Getter for the current subtitle attribute. :returns: Current subtitle or None if there isn't :rtype: :class:`pysrt.srtitem.SubRipItem` or None """ return self._subtitle
[docs] def clear_subtitle(self, complete=True): """Clears the current subtitle and related private attributes. :param complete: Mark the current subtitle as completed """ if complete and self.has_subtitle(): self._complete.append(self._subtitle) self._subtitle = None self._etalon = None self._prepared_origin_words = None
[docs] def whether_complete(self, subtitle): """Checks whether current subtitle previously completed or not. :param subtitle: Suspicious subtitle :type subtitle: :class:`pysrt.srtitem.SubRipItem` :returns: State of completion :rtype: boolean """ return bool(subtitle.text in [sub.text for sub in self._complete])
[docs] def has_subtitle(self): """Checks the existence of current subtitle. :returns: Existence of the current subtitle :rtype: boolean """ return isinstance(self._subtitle, SubRipItem)
[docs] def is_passed(self, position): """Checks for the ending of the current subtitle. :param int position: Position in media stream (in seconds) :returns: Achievement of the subtitle ending :rtype: boolean """ if ( self.has_subtitle() and SubRipTime(seconds=int(position)) >= self._subtitle.end ): return True return False
[docs] def _clean_includes(self): """Returns a collection of symbols which may pass the filter. :returns: "Safe" symbols :rtype: tuple """ return (u' ', u"'",)
[docs] def _accept_filters(self, src): """Applies regex filters to a text. :param src: Source grubby text :type src: str or unicode :returns: Filtered tidy text or None if `src` isn't a text :rtype: unicode or None """ if isinstance(src, (int, float)): src = unicode(src) if isinstance(src, (str, unicode)): result = src for pattern, replacement in self.player.get_filters().iteritems(): result = re.sub(pattern, replacement, result) return result else: return None
[docs] def _clean_text(self, src): """Removes unwanted characters from a text. :param src: Source text :type src: str or unicode :returns: Clean text or None if `src` isn't a text :rtype: unicode or None """ if isinstance(src, (int, float)): src = unicode(src) if isinstance(src, (str, unicode)): return u''.join(symbol for symbol in src.lower() if symbol.isalpha() or symbol.isdigit() or symbol in self._clean_includes()) else: return None
[docs] def get_etalon(self): """Prepares a filtered etalon phrase from the subtitle text. :returns: Filtered subtitle text :rtype: unicode """ if getattr(self, '_etalon', None) is None: text = self.get_subtitle().text text = self._accept_filters(text) self._etalon = self._clean_text(text) return self._etalon
[docs] def get_etalon_words(self): """Returns clean words prepared from the subtitle text :returns: Clean words or None if there isn't source for words :rtype: list of unicode or None """ etalon = self.get_etalon() if isinstance(etalon, (str, unicode)): return etalon.split() else: return None
[docs] def _prepare_answer_words(self, text): """Cleans user's answer and splits it into words. :param text: User's answer :type text: str or unicode :returns: Cleaned words or empty list :rtype: list of unicode """ result = self._clean_text(text) return result.split() if result is not None else []
[docs] def _prepare_origin_words(self): """Prepares the original subtitle text divided into words. :returns: Original subtitle text like a list of words :rtype: list of unicode """ if getattr(self, '_prepared_origin_words', None) is None: def has_alpha_or_digit(word): for character in word: if character.isalpha() or character.isdigit(): return True return False self._prepared_origin_words = filter( has_alpha_or_digit, self._accept_filters( self.get_subtitle().text).split()) return self._prepared_origin_words
[docs] def get_last_word(self): """Returns last full word from user's answer. :returns: Last word from answer :rtype: unicode """ return getattr(self, '_last_word', None)
[docs] def verify_answer(self, answer): """Checks the correctness of every symbol in user's answer. :param answer: User's answer :type answer: str or unicode :returns: Estimation of the answer as a list of pairs where first value is a symbol and second is an estimation of its correctness :rtype: list of tuples (list of pairs) """ result = [] if self.has_subtitle(): origin_words = self._prepare_origin_words() etalon_words = self.get_etalon_words() answer_words = self._prepare_answer_words(answer) for idx in xrange(len(answer_words)): try: if answer_words[idx] == etalon_words[idx]: result.extend([(sym, True) for sym in origin_words[idx]]) result.append((u' ', True)) self._last_word = etalon_words[idx] else: all_next_incorrect = False for cursor in ndiff(answer_words[idx], etalon_words[idx]): if cursor[0] == u' ': if all_next_incorrect: result.append((cursor[2], False)) self._mistakes_counter += 1 break else: result.append((cursor[2], True)) elif cursor[0] == u'-': result.append((cursor[2], False)) self._mistakes_counter += 1 break elif cursor[0] == u'+': all_next_incorrect = True except IndexError: if len(answer_words[idx]): result.append((answer_words[idx][0], False)) self._mistakes_counter += 1 return result
[docs] def hint(self, answer): """Prompt next word to the user. If user began to write next word it will complete this, else it will add full next word. :param answer: User's input in actual state :type answer: str or unicode :returns: Received input with completed or added next word :rtype: unicode """ result = u'' if self.has_subtitle(): self._hint_counter += 1 etalon_words = self.get_etalon_words() answer_words = self._prepare_answer_words(answer) if not len(answer_words): try: return etalon_words[0] except IndexError: return u'' else: try: if etalon_words[len(answer_words)-1] != answer_words[-1]: answer_words[-1] = etalon_words[len(answer_words)-1] else: answer_words.append(etalon_words[len(answer_words)]) except IndexError: pass finally: result = u' '.join(answer_words) return result
[docs] def is_complete(self, answer): """Checks completeness of current subtitles fragment. :param answer: User's answer in actual state :type answer: unicode or str :returns: Whether the current fragment is finished or not :rtype: boolean """ return bool(u' '.join(self._prepare_answer_words(answer)) == u' '.join(self.get_etalon_words()))
[docs] def is_empty(self): """Checks the content existence in current subtitles fragment. Sometimes text ends after an application of filters. :returns: Existence of the text :rtype: boolean """ return not bool(len(self.get_etalon_words()))
[docs] def fragment_length(self): """Calculates the length of the subtitles fragment in symbols. :returns: Length of the fragment (in characters) :rtype: int """ if self.has_subtitle(): return len(u' '.join(self._prepare_origin_words())) else: return 0
[docs] def get_statistics(self): """Returns a statistics information about learning process. :returns: Learning statistics :rtype: dict """ return { u'completed_fragments': len(self._complete), u'learning_time': (datetime.now() - self._created_at).total_seconds(), u'hint_used': self._hint_counter, u'mistakes': self._mistakes_counter, u'total_chars': sum([len(sub.text) for sub in self._complete]), }