Package concurrent_tree_crawler :: Package html_multipage_navigator :: Module abstract_page_analyzer
[hide private]
[frames] | no frames]

Source Code for Module concurrent_tree_crawler.html_multipage_navigator.abstract_page_analyzer

 1  from concurrent_tree_crawler.html_multipage_navigator.web_browser import \ 
 2          AbstractWebBrowser 
 3   
17   
18 -class AbstractPageAnalyzer:
19 - def process(self, tree_path, page_file):
20 """ 21 Process the node (normally, this method is called once for every node). 22 23 @param tree_path: path to the tree node the navigator is currently in 24 i.e. subsequent node names from the tree root to the current node. 25 This might be e.g. C{["root"]} for a path to the root node or 26 C{["root", "magazine-2011-09-18", "article_23"]} for some other 27 node inside the tree hierarchy. 28 @type tree_path: list of strings 29 @param page_file: file-like structure to be processed 30 """ 31 pass
32
44
45 -class Level:
46 - def __init__(self, name, page_analyzer):
47 """ 48 @type name: Name of the level. 49 Example names: "books", "chapters", "sections". 50 @type page_analyzer: L{AbstractPageAnalyzer} 51 """ 52 self.name = name 53 """Name of the level. Example names: "book", "chapter", "section".""" 54 55 self.page_analyzer = page_analyzer 56 """L{AbstractPageAnalyzer} object used for analyzing pages of the 57 given level."""
58
59 -class AbstractLevelsCreator:
60 """ 61 A class responsible for creating a list of C{Level}s which 62 describe structure of the explored web site. 63 """ 64
65 - def create(self):
66 """ 67 Create list of L{Level}s. The first element is a level 68 corresponding to the root node, the last one corresponds to a leaf. 69 """ 70 raise NotImplementedError()
71