1 from concurrent_tree_crawler.html_multipage_navigator.web_browser import \
2 MechanizeBrowserCreator
3 from concurrent_tree_crawler.html_multipage_navigator.throttled_web_browser \
4 import ThrottledWebBrowserCreator
5 from concurrent_tree_crawler.common.threads.token_bucket import \
6 TokenBucketFiller, StandardTokenBucket
7 from concurrent_tree_crawler.html_multipage_navigator.tree_navigator import \
8 HTMLMultipageNavigator
9 from concurrent_tree_crawler.html_multipage_navigator.sample_page_analyzer \
10 import LevelsCreator
11 from concurrent_tree_crawler.abstract_cmdln_navigators_creator import \
12 AbstractCmdLnNavigatorsCreator
13 from concurrent_tree_crawler.html_multipage_navigator.cmdln.abstract_levels_creator \
14 import AbstractCmdLnLevelsCreator
15
18 """@type levels_creator: L{AbstractCmdLnLevelsCreator}"""
19 self.__token_filler = None
20 self.__levels_creator = levels_creator
21
23 parser.add_argument("source_address",
24 help="the address of the web site to crawl.")
25 parser.add_argument("--max_pages_per_second", type=float,
26 help="Maximal number of web pages downloads per second "\
27 "(a real number). By default no limit is imposed.")
28 self.__levels_creator.fill_parser(parser)
29
30 - def create(self, args, navigators_count):
40
55
57 """
58 It is possible to override this function to use a different
59 C{AbstractWebBrowserCreator}.
60
61 @rtype: C{AbstractWebBrowserCreator}
62 """
63 return MechanizeBrowserCreator()
64
66 if self.__token_filler is not None:
67 self.__token_filler.stop()
68 self.__levels_creator.on_exit()
69