Package concurrent_tree_crawler :: Package html_multipage_navigator :: Package cmdln :: Module navigators_creator
[hide private]
[frames] | no frames]

Source Code for Module concurrent_tree_crawler.html_multipage_navigator.cmdln.navigators_creator

 1  from concurrent_tree_crawler.html_multipage_navigator.web_browser import \ 
 2          MechanizeBrowserCreator 
 3  from concurrent_tree_crawler.html_multipage_navigator.throttled_web_browser \ 
 4          import ThrottledWebBrowserCreator 
 5  from concurrent_tree_crawler.common.threads.token_bucket import \ 
 6          TokenBucketFiller, StandardTokenBucket 
 7  from concurrent_tree_crawler.html_multipage_navigator.tree_navigator import \ 
 8          HTMLMultipageNavigator 
 9  from concurrent_tree_crawler.html_multipage_navigator.sample_page_analyzer \ 
10          import LevelsCreator 
11  from concurrent_tree_crawler.abstract_cmdln_navigators_creator import \ 
12          AbstractCmdLnNavigatorsCreator 
13  from concurrent_tree_crawler.html_multipage_navigator.cmdln.abstract_levels_creator \ 
14          import AbstractCmdLnLevelsCreator 
15   
16 -class CmdLnNavigatorsCreator(AbstractCmdLnNavigatorsCreator):
17 - def __init__(self, levels_creator):
18 """@type levels_creator: L{AbstractCmdLnLevelsCreator}""" 19 self.__token_filler = None 20 self.__levels_creator = levels_creator
21
22 - def fill_parser(self, parser):
23 parser.add_argument("source_address", 24 help="the address of the web site to crawl.") 25 parser.add_argument("--max_pages_per_second", type=float, 26 help="Maximal number of web pages downloads per second "\ 27 "(a real number). By default no limit is imposed.") 28 self.__levels_creator.fill_parser(parser)
29
30 - def create(self, args, navigators_count):
31 browser_creator = self.__get_browser_creator_and_start_token_filler( 32 args.max_pages_per_second) 33 navigators = [] 34 for _ in range(navigators_count): 35 navigators.append( 36 HTMLMultipageNavigator(args.source_address, 37 self.__levels_creator.create(args), 38 browser_creator)) 39 return navigators
40
41 - def __get_browser_creator_and_start_token_filler(self, 42 max_pages_per_second):
43 self.__token_filler = None 44 browser_creator = None 45 if max_pages_per_second is not None: 46 token_bucket = StandardTokenBucket(max_pages_per_second) 47 browser_creator = ThrottledWebBrowserCreator( 48 self._create_browser_creator(), token_bucket) 49 self.__token_filler = TokenBucketFiller( 50 token_bucket, 1, max_pages_per_second) 51 self.__token_filler.start() 52 else: 53 browser_creator = self._create_browser_creator() 54 return browser_creator
55
56 - def _create_browser_creator(self):
57 """ 58 It is possible to override this function to use a different 59 C{AbstractWebBrowserCreator}. 60 61 @rtype: C{AbstractWebBrowserCreator} 62 """ 63 return MechanizeBrowserCreator()
64
65 - def on_exit(self):
66 if self.__token_filler is not None: 67 self.__token_filler.stop() 68 self.__levels_creator.on_exit()
69