1 from concurrent_tree_crawler.html_multipage_navigator.web_browser import \
2 AbstractWebBrowser
3
5 - def __init__(self, children, next_page_link):
6 """
7 @param children: list of (name of child, link to the child) pairs.
8 @param next_page_link: link to the next page corresponding to the
9 same node.
10 """
11 self.children = children
12 """A list of (name of child, child link) pairs."""
13
14 self.next_page_link = next_page_link
15 """Link to the next page corresponding to the same node.
16 C{None}, if there is no such link on the page."""
17
19 - def process(self, tree_path, page_file):
20 """
21 Process the node (normally, this method is called once for every node).
22
23 @param tree_path: path to the tree node the navigator is currently in
24 i.e. subsequent node names from the tree root to the current node.
25 This might be e.g. C{["root"]} for a path to the root node or
26 C{["root", "magazine-2011-09-18", "article_23"]} for some other
27 node inside the tree hierarchy.
28 @type tree_path: list of strings
29 @param page_file: file-like structure to be processed
30 """
31 pass
32
33 - def get_links(self, page_file, child_links_retrieved_so_far_count):
34 """
35 @param page_file: file-like structure to be analyzed
36 @param child_links_retrieved_so_far_count: number of child links
37 retrieved so far in current node (from previous pages)
38 @return: information about links on the given page.
39 The given default implementation is made for a leaf node
40 (a page with no children).
41 @rtype: L{PageLinks}
42 """
43 return PageLinks([], None)
44
46 - def __init__(self, name, page_analyzer):
47 """
48 @type name: Name of the level.
49 Example names: "books", "chapters", "sections".
50 @type page_analyzer: L{AbstractPageAnalyzer}
51 """
52 self.name = name
53 """Name of the level. Example names: "book", "chapter", "section"."""
54
55 self.page_analyzer = page_analyzer
56 """L{AbstractPageAnalyzer} object used for analyzing pages of the
57 given level."""
58
60 """
61 A class responsible for creating a list of C{Level}s which
62 describe structure of the explored web site.
63 """
64
66 """
67 Create list of L{Level}s. The first element is a level
68 corresponding to the root node, the last one corresponds to a leaf.
69 """
70 raise NotImplementedError()
71