1
2
3
4 """
5 usufy.py Copyright (C) F. Brezo and Y. Rubio (i3visio) 2014
6 This program comes with ABSOLUTELY NO WARRANTY.
7 This is free software, and you are welcome to redistribute it under certain conditions.
8 For details, run:
9 python usufy.py --license"""
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 import argparse
29 import urllib2
30 import os
31
32 from multiprocessing import Process, Queue
33 import time
34
35 import config_usufy as config
36
38 """ Method to generate the text to be appended to a CSV file.
39
40 Return values:
41 csvText as the string to be written in a CSV file.
42 """
43 print "Generating .csv..."
44 csvText = ""
45 for r in res.keys():
46 for p in res[r].keys():
47 csvText += str(r) + ";" + str(p) + ";" + res[r][p] + "\n"
48 return csvText
49
51 """
52 Method to generate the text to be appended to a Json file.
53
54 List of parameters that the method receives:
55 profiles: a dictionary with the information of the profiles
56
57 Return values:
58 jsonText as the string to be written in a Json file.
59 """
60 print "Generating .json..."
61 import json
62 jsonText = json.dumps(profiles)
63 return jsonText
64
65 -def getPageWrapper(p, nick, rutaDescarga, avoidProcessing, outQueue=None):
66 """
67 Method that wraps the call to the getUserPage.
68
69 List of parameters that the method receives:
70 p: platform where the information is stored.
71 nick: nick to be searched.
72 rutaDescarga: local file where saving the obtained information.
73 avoidProcessing:boolean var that defines whether the profiles will NOT be processed (stored in this version).
74 outQueue: Queue where the information will be stored.
75
76 Return values:
77 None if a queue is provided. Note that the values will be stored in the outQueue
78 Else (p, url).
79 """
80 print "\tLooking for profiles in " + str(p) + "..."
81 url = p.getUserPage(nick, rutaDescarga, avoidProcessing)
82 if url != None:
83 if outQueue != None:
84 print "\t" + str(p) +" - User profile found:\t" + url
85
86 outQueue.put((p, url))
87 else:
88
89 return (p, url)
90 else:
91 print "\t" + str(p) +" - User profile not found..."
92
93 -def processNickList(nicks, platforms=None, rutaDescarga=None, avoidProcessing=True):
94 """
95 Method that receives as a parameter a series of nicks and verifies whether those nicks have a profile associated in different social networks.
96
97 List of parameters that the method receives:
98 nicks: list of nicks to process.
99 platforms: list of <Platform> objects to be processed.
100 rutaDescarga: local file where saving the obtained information.
101 avoidProcessing:boolean var that defines whether the profiles will NOT be processed (stored in this version).
102
103 Return values:
104 Returns a dictionary where the key is the nick and the value another dictionary where the keys are the social networks and te value is the corresponding URL.
105 """
106 if platforms == None:
107 platforms = config.getPlatforms()
108
109 res = {}
110
111
112 for nick in nicks:
113 print "Processing " + nick + "..."
114
115 outQueue = Queue()
116
117
118 processes = []
119 for p in platforms:
120
121 proc = Process(target= getPageWrapper, args= (p, nick, rutaDescarga, avoidProcessing, outQueue))
122
123 processes.append(proc)
124
125 proc.start()
126
127
128 for p in processes:
129 p.join()
130
131 profiles = {}
132
133
134 while not outQueue.empty():
135
136 p, url = outQueue.get()
137 profiles[p] = url
138
139
140 res[nick] = profiles
141 return res
142
143 if __name__ == "__main__":
144 print "usufy.py Copyright (C) F. Brezo and Y. Rubio (i3visio) 2014"
145 print "This program comes with ABSOLUTELY NO WARRANTY."
146 print "This is free software, and you are welcome to redistribute it under certain conditions."
147 print "For details, run:"
148 print "\tpython usufy.py --license"
149 print ""
150
151 parser = argparse.ArgumentParser(description='usufy.py - Piece of software that checks the existence of a profile for a given user in a bunch of different platforms.', prog='usufy.py', epilog='Check the README.md file for further details on the usage of this program.', add_help=False)
152 parser._optionals.title = "Input options (one required)"
153
154
155 general = parser.add_mutually_exclusive_group(required=True)
156
157 general.add_argument('--info', metavar='<action>', choices=['list_platforms', 'list_tags'], action='store', help='select the action to be performed amongst the following: list_platforms (list the details of the selected platforms) or list_tags (list the tags of the selected platforms).')
158 general.add_argument('-l', '--list', metavar='<path_to_nick_list>', action='store', type=argparse.FileType('r'), help='path to the file where the list of nicks to verify is stored (one per line).')
159 general.add_argument('-n', '--nicks', metavar='<nick>', nargs='+', action='store', help = 'the list of nicks to process (at least one is required).')
160
161
162 groupPlatforms = parser.add_argument_group('Platform selection arguments', 'Criteria for selecting the platforms where performing the search.')
163 groupPlatforms.add_argument('-p', '--platforms', metavar='<platform>', choices=['all', 'badoo', 'blip', 'dailymotion', 'delicious', 'douban','ebay', 'facebook', 'foursquare', 'github', 'googleplus', 'hi5', 'instagram', 'karmacracy', 'klout', 'myspace', 'pastebin', 'scribd', 'slideshare', 'pinterest', 'qq', 'tumblr', 'twitter', 'vk', 'youtube'], default = [], nargs='+', required=False, action='store', help='select the platforms where you want to perform the search amongst the following: all, badoo, blip, dailymotion, delicious, douban, ebay, facebook, foursquare, github, googleplus, hi5, instagram, karmacracy, klout, myspace, pastebin, pinterest, qq, scribd, slideshare, tumblr, twitter, vk, youtube. More than one option can be selected.')
164 groupPlatforms.add_argument('-t', '--tags', metavar='<tag>', default = [], nargs='+', required=False, action='store', help='select the list of tags that fit the platforms in which you want to perform the search. More than one option can be selected.')
165
166
167 groupProcessing = parser.add_argument_group('Processing arguments', 'Configuring the way in which usufy will process the identified profiles.')
168 groupProcessing.add_argument('-a', '--avoid_processing', required=False, action='store_true', default=False, help='argument to force usufy NOT to process the downloaded valid profiles.')
169 groupProcessing.add_argument('-e', '--extension', metavar='<sum_ext>', nargs='+', choices=['csv', 'json'], required=False, action='store', help='output extension for the summary files (at least one is required).')
170 groupProcessing.add_argument('-o', '--output_folder', metavar='<path_to_output_folder>', required=False, action='store', help='output folder for the generated documents. While if the paths does not exist, usufy.py will try to create; if this argument is not provided, usufy will NOT write any down any data. Check permissions if something goes wrong.')
171
172
173 groupAbout = parser.add_argument_group('About arguments', 'Showing additional information about this program.')
174 groupAbout.add_argument('-h', '--help', action='help', help='shows the version of the program and exists.')
175 groupAbout.add_argument('--license', required=False, action='store_true', default=False, help='shows the GPLv3 license.')
176 groupAbout.add_argument('--version', action='version', version='%(prog)s 0.1', help='shows the version of the program and exists.')
177
178 args = parser.parse_args()
179
180 if args.license:
181 print "Looking for the license..."
182
183 try:
184 with open ("COPYING", "r") as iF:
185 contenido = iF.read().splitlines()
186 for linea in contenido:
187 print linea
188 except Exception:
189 print "ERROR: there has been an error when opening the COPYING file."
190 print "\tThe file contains the terms of the GPLv3 under which this software is distributed."
191 print "\tIn case of doubts, verify the integrity of the files or contact contacto@i3visio.com."
192
193 else:
194
195 listPlatforms = config.getPlatforms(args.platforms, args.tags)
196
197
198 if not args.info:
199
200 nicks = []
201
202 if args.nicks:
203 nicks = args.nicks
204 else:
205
206 try:
207 nicks = args.list.read().splitlines()
208 except:
209 print "ERROR: there has been an error when opening the file that stores the nicks."
210 print "\tPlease, check the existence of this file."
211
212 if args.output_folder != None:
213
214 print "Creating the output folder..."
215 if not os.path.exists(args.output_folder):
216 os.makedirs(args.output_folder)
217
218 res = processNickList(nicks, listPlatforms, args.threads, args.output_folder, args.avoid_processing)
219 else:
220 res = processNickList(nicks, listPlatforms, args.threads)
221
222
223 if args.extension:
224
225
226 if not os.path.exists (args.output_folder):
227 os.makedirs(args.output_folder)
228 if "csv" in args.extension:
229 with open (os.path.join(args.output_folder, "results.csv"), "w") as oF:
230 oF.write( resultsToCSV(res) + "\n" )
231 if "json" in args.extension:
232 with open (os.path.join(args.output_folder, "results.json"), "w") as oF:
233 oF.write( resultsToJson(res) + "\n")
234 if res.keys():
235 print "Summing up details..."
236 for nick in res.keys():
237 print nick + ":"
238 print "\tPlatforms where the nick '" + nick + "' has been found..."
239 tags = []
240 for plat in res[nick].keys():
241 print "\t\t" + str(plat) + ":\t" + res[nick][plat]
242
243
244 elif args.info == 'list_platforms':
245 print "List of platforms:"
246 print "------------------"
247 for p in listPlatforms:
248 print "\t" + str(p) + ": " + str(p.tags)
249 elif args.info == 'list_tags':
250 print "List of tags:"
251 print "-------------"
252 tags = {}
253
254 for p in listPlatforms:
255 for t in p.tags:
256 if t not in tags.keys():
257 tags[t] = 1
258 else:
259 tags[t] += 1
260
261 for t in tags.keys().sort():
262 print "\t" + t + ": " + str(tags[t]) + " times"
263