Package usufy :: Module usufy
[hide private]
[frames] | no frames]

Source Code for Module usufy.usufy

  1  # !/usr/bin/python 
  2  # -*- coding: cp1252 -*- 
  3   
  4  """ 
  5  usufy.py Copyright (C) F. Brezo and Y. Rubio (i3visio) 2014 
  6  This program comes with ABSOLUTELY NO WARRANTY. 
  7  This is free software, and you are welcome to redistribute it under certain conditions. 
  8  For details, run: 
  9          python usufy.py --license""" 
 10  # 
 11  ################################################################################## 
 12  # 
 13  #       This program is free software: you can redistribute it and/or modify 
 14  #       it under the terms of the GNU General Public License as published by 
 15  #       the Free Software Foundation, either version 3 of the License, or 
 16  #       (at your option) any later version. 
 17  # 
 18  #       This program is distributed in the hope that it will be useful, 
 19  #       but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  #       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 21  #       GNU General Public License for more details. 
 22  # 
 23  #       You should have received a copy of the GNU General Public License 
 24  #       along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 25  # 
 26  ################################################################################## 
 27   
 28  import argparse 
 29  import urllib2 
 30  import os 
 31   
 32  from multiprocessing import Process, Queue 
 33  import time 
 34   
 35  import config_usufy as config 
 36   
37 -def resultsToCSV(res):
38 """ Method to generate the text to be appended to a CSV file. 39 40 Return values: 41 csvText as the string to be written in a CSV file. 42 """ 43 print "Generating .csv..." 44 csvText = "" 45 for r in res.keys(): 46 for p in res[r].keys(): 47 csvText += str(r) + ";" + str(p) + ";" + res[r][p] + "\n" 48 return csvText
49
50 -def resultsToJson(profiles):
51 """ 52 Method to generate the text to be appended to a Json file. 53 54 List of parameters that the method receives: 55 profiles: a dictionary with the information of the profiles 56 57 Return values: 58 jsonText as the string to be written in a Json file. 59 """ 60 print "Generating .json..." 61 import json 62 jsonText = json.dumps(profiles) 63 return jsonText
64
65 -def getPageWrapper(p, nick, rutaDescarga, avoidProcessing, outQueue=None):
66 """ 67 Method that wraps the call to the getUserPage. 68 69 List of parameters that the method receives: 70 p: platform where the information is stored. 71 nick: nick to be searched. 72 rutaDescarga: local file where saving the obtained information. 73 avoidProcessing:boolean var that defines whether the profiles will NOT be processed (stored in this version). 74 outQueue: Queue where the information will be stored. 75 76 Return values: 77 None if a queue is provided. Note that the values will be stored in the outQueue 78 Else (p, url). 79 """ 80 print "\tLooking for profiles in " + str(p) + "..." 81 url = p.getUserPage(nick, rutaDescarga, avoidProcessing) 82 if url != None: 83 if outQueue != None: 84 print "\t" + str(p) +" - User profile found:\t" + url 85 # Storing in the output queue the values 86 outQueue.put((p, url)) 87 else: 88 # If no queue was given, return the value normally 89 return (p, url) 90 else: 91 print "\t" + str(p) +" - User profile not found..."
92
93 -def processNickList(nicks, platforms=None, rutaDescarga=None, avoidProcessing=True):
94 """ 95 Method that receives as a parameter a series of nicks and verifies whether those nicks have a profile associated in different social networks. 96 97 List of parameters that the method receives: 98 nicks: list of nicks to process. 99 platforms: list of <Platform> objects to be processed. 100 rutaDescarga: local file where saving the obtained information. 101 avoidProcessing:boolean var that defines whether the profiles will NOT be processed (stored in this version). 102 103 Return values: 104 Returns a dictionary where the key is the nick and the value another dictionary where the keys are the social networks and te value is the corresponding URL. 105 """ 106 if platforms == None: 107 platforms = config.getPlatforms() 108 109 res = {} 110 111 # Processing the whole list of terms... 112 for nick in nicks: 113 print "Processing " + nick + "..." 114 # defining the Queue where the results will be stored 115 outQueue = Queue() 116 117 # List of processes to be used 118 processes = [] 119 for p in platforms: 120 # We're setting all the arguments to be used, adding the output queue 121 proc = Process(target= getPageWrapper, args= (p, nick, rutaDescarga, avoidProcessing, outQueue)) 122 # Adding the process to a list: 123 processes.append(proc) 124 # Starting the computing of the process... 125 proc.start() 126 127 # Wait for all process to finish 128 for p in processes: 129 p.join() 130 131 profiles = {} 132 133 # Recovering all results and generating the dictionary 134 while not outQueue.empty(): 135 # Recovering the results 136 p, url = outQueue.get() 137 profiles[p] = url 138 139 # Storing in a global variable to be returned 140 res[nick] = profiles 141 return res
142 143 if __name__ == "__main__": 144 print "usufy.py Copyright (C) F. Brezo and Y. Rubio (i3visio) 2014" 145 print "This program comes with ABSOLUTELY NO WARRANTY." 146 print "This is free software, and you are welcome to redistribute it under certain conditions." 147 print "For details, run:" 148 print "\tpython usufy.py --license" 149 print "" 150 151 parser = argparse.ArgumentParser(description='usufy.py - Piece of software that checks the existence of a profile for a given user in a bunch of different platforms.', prog='usufy.py', epilog='Check the README.md file for further details on the usage of this program.', add_help=False) 152 parser._optionals.title = "Input options (one required)" 153 154 # Defining the mutually exclusive group for the main options 155 general = parser.add_mutually_exclusive_group(required=True) 156 # Adding the main options 157 general.add_argument('--info', metavar='<action>', choices=['list_platforms', 'list_tags'], action='store', help='select the action to be performed amongst the following: list_platforms (list the details of the selected platforms) or list_tags (list the tags of the selected platforms).') 158 general.add_argument('-l', '--list', metavar='<path_to_nick_list>', action='store', type=argparse.FileType('r'), help='path to the file where the list of nicks to verify is stored (one per line).') 159 general.add_argument('-n', '--nicks', metavar='<nick>', nargs='+', action='store', help = 'the list of nicks to process (at least one is required).') 160 161 # Selecting the platforms where performing the search 162 groupPlatforms = parser.add_argument_group('Platform selection arguments', 'Criteria for selecting the platforms where performing the search.') 163 groupPlatforms.add_argument('-p', '--platforms', metavar='<platform>', choices=['all', 'badoo', 'blip', 'dailymotion', 'delicious', 'douban','ebay', 'facebook', 'foursquare', 'github', 'googleplus', 'hi5', 'instagram', 'karmacracy', 'klout', 'myspace', 'pastebin', 'scribd', 'slideshare', 'pinterest', 'qq', 'tumblr', 'twitter', 'vk', 'youtube'], default = [], nargs='+', required=False, action='store', help='select the platforms where you want to perform the search amongst the following: all, badoo, blip, dailymotion, delicious, douban, ebay, facebook, foursquare, github, googleplus, hi5, instagram, karmacracy, klout, myspace, pastebin, pinterest, qq, scribd, slideshare, tumblr, twitter, vk, youtube. More than one option can be selected.') 164 groupPlatforms.add_argument('-t', '--tags', metavar='<tag>', default = [], nargs='+', required=False, action='store', help='select the list of tags that fit the platforms in which you want to perform the search. More than one option can be selected.') 165 166 # Configuring the processing options 167 groupProcessing = parser.add_argument_group('Processing arguments', 'Configuring the way in which usufy will process the identified profiles.') 168 groupProcessing.add_argument('-a', '--avoid_processing', required=False, action='store_true', default=False, help='argument to force usufy NOT to process the downloaded valid profiles.') 169 groupProcessing.add_argument('-e', '--extension', metavar='<sum_ext>', nargs='+', choices=['csv', 'json'], required=False, action='store', help='output extension for the summary files (at least one is required).') 170 groupProcessing.add_argument('-o', '--output_folder', metavar='<path_to_output_folder>', required=False, action='store', help='output folder for the generated documents. While if the paths does not exist, usufy.py will try to create; if this argument is not provided, usufy will NOT write any down any data. Check permissions if something goes wrong.') 171 172 # About options 173 groupAbout = parser.add_argument_group('About arguments', 'Showing additional information about this program.') 174 groupAbout.add_argument('-h', '--help', action='help', help='shows the version of the program and exists.') 175 groupAbout.add_argument('--license', required=False, action='store_true', default=False, help='shows the GPLv3 license.') 176 groupAbout.add_argument('--version', action='version', version='%(prog)s 0.1', help='shows the version of the program and exists.') 177 178 args = parser.parse_args() 179 180 if args.license: 181 print "Looking for the license..." 182 # mostramos la licencia 183 try: 184 with open ("COPYING", "r") as iF: 185 contenido = iF.read().splitlines() 186 for linea in contenido: 187 print linea 188 except Exception: 189 print "ERROR: there has been an error when opening the COPYING file." 190 print "\tThe file contains the terms of the GPLv3 under which this software is distributed." 191 print "\tIn case of doubts, verify the integrity of the files or contact contacto@i3visio.com." 192 193 else: 194 # Recovering the list of platforms to be launched 195 listPlatforms = config.getPlatforms(args.platforms, args.tags) 196 197 # Executing the corresponding process... 198 if not args.info: 199 # Defining the list of users to monitor 200 nicks = [] 201 202 if args.nicks: 203 nicks = args.nicks 204 else: 205 # Reading the nick files 206 try: 207 nicks = args.list.read().splitlines() 208 except: 209 print "ERROR: there has been an error when opening the file that stores the nicks." 210 print "\tPlease, check the existence of this file." 211 212 if args.output_folder != None: 213 # if Verifying an output folder was selected 214 print "Creating the output folder..." 215 if not os.path.exists(args.output_folder): 216 os.makedirs(args.output_folder) 217 # Launching the process... 218 res = processNickList(nicks, listPlatforms, args.threads, args.output_folder, args.avoid_processing) 219 else: 220 res = processNickList(nicks, listPlatforms, args.threads) 221 222 # Generating summary files for each ... 223 if args.extension: 224 # Storing the file... 225 # Verifying if the outputPath exists 226 if not os.path.exists (args.output_folder): 227 os.makedirs(args.output_folder) 228 if "csv" in args.extension: 229 with open (os.path.join(args.output_folder, "results.csv"), "w") as oF: 230 oF.write( resultsToCSV(res) + "\n" ) 231 if "json" in args.extension: 232 with open (os.path.join(args.output_folder, "results.json"), "w") as oF: 233 oF.write( resultsToJson(res) + "\n") 234 if res.keys(): 235 print "Summing up details..." 236 for nick in res.keys(): 237 print nick + ":" 238 print "\tPlatforms where the nick '" + nick + "' has been found..." 239 tags = [] 240 for plat in res[nick].keys(): 241 print "\t\t" + str(plat) + ":\t" + res[nick][plat] 242 243 # Information actions... 244 elif args.info == 'list_platforms': 245 print "List of platforms:" 246 print "------------------" 247 for p in listPlatforms: 248 print "\t" + str(p) + ": " + str(p.tags) 249 elif args.info == 'list_tags': 250 print "List of tags:" 251 print "-------------" 252 tags = {} 253 # Going through all the selected platforms to get their tags 254 for p in listPlatforms: 255 for t in p.tags: 256 if t not in tags.keys(): 257 tags[t] = 1 258 else: 259 tags[t] += 1 260 # Displaying the results in a sorted list 261 for t in tags.keys().sort(): 262 print "\t" + t + ": " + str(tags[t]) + " times" 263