Package pygeoip
[hide private]
[frames] | no frames]

Source Code for Package pygeoip

  1  """
 
  2  Pure Python GeoIP API. The API is based off of U{MaxMind's C-based Python API<http://www.maxmind.com/app/python>},
 
  3  but the code itself is based on the U{pure PHP5 API<http://pear.php.net/package/Net_GeoIP/>}
 
  4  by Jim Winstead and Hans Lellelid.
 
  5  
 
  6  It is mostly a drop-in replacement, except the
 
  7  C{new} and C{open} methods are gone. You should instantiate the L{GeoIP} class yourself:
 
  8  
 
  9  C{gi = GeoIP('/path/to/GeoIP.dat', pygeoip.MEMORY_CACHE)}
 
 10  
 
 11  @author: Jennifer Ennis <zaylea at gmail dot com>
 
 12  
 
 13  @license:
 
 14  Copyright(C) 2004 MaxMind LLC
 
 15  
 
 16  This program is free software: you can redistribute it and/or modify
 
 17  it under the terms of the GNU Lesser General Public License as published by
 
 18  the Free Software Foundation, either version 3 of the License, or
 
 19  (at your option) any later version.
 
 20  
 
 21  This program is distributed in the hope that it will be useful,
 
 22  but WITHOUT ANY WARRANTY; without even the implied warranty of
 
 23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
 24  GNU General Public License for more details.
 
 25  
 
 26  You should have received a copy of the GNU Lesser General Public License
 
 27  along with this program.  If not, see <http://www.gnu.org/licenses/lgpl.txt>.
 
 28  """ 
 29  
 
 30  from __future__ import with_statement 
 31  import os 
 32  import math 
 33  import socket 
 34  import mmap 
 35  
 
 36  from const import * 
 37  from util import ip2long 
 38  
 
39 -class GeoIPError(Exception):
40 pass
41
42 -class GeoIPMetaclass(type):
43
44 - def __new__(cls, *args, **kwargs):
45 """ 46 Singleton method to gets an instance without reparsing the db. Unique 47 instances are instantiated based on the filename of the db. Flags are 48 ignored for this, i.e. if you initialize one with STANDARD flag (default) 49 and then try later to initialize with MEMORY_CACHE, it will still 50 return the STANDARD one. 51 """ 52 53 if not hasattr(cls, '_instances'): 54 cls._instances = {} 55 56 if len(args) > 0: 57 filename = args[0] 58 elif 'filename' in kwargs: 59 filename = kwargs['filename'] 60 61 if not filename in cls._instances: 62 cls._instances[filename] = type.__new__(cls, *args, **kwargs) 63 64 return cls._instances[filename]
65 66 GeoIPBase = GeoIPMetaclass('GeoIPBase', (object,), {}) 67
68 -class GeoIP(GeoIPBase):
69
70 - def __init__(self, filename, flags=0):
71 """ 72 Initialize the class. 73 74 @param filename: path to a geoip database 75 @type filename: str 76 @param flags: flags that affect how the database is processed. 77 Currently the only supported flags are STANDARD, MEMORY_CACHE, and 78 MMAP_CACHE. 79 @type flags: int 80 """ 81 self._filename = filename 82 self._flags = flags 83 84 if self._flags & MMAP_CACHE: 85 with open(filename, 'rb') as f: 86 self._filehandle = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) 87 88 else: 89 self._filehandle = open(filename, 'rb') 90 91 if self._flags & MEMORY_CACHE: 92 self._memoryBuffer = self._filehandle.read() 93 94 self._setup_segments()
95
96 - def _setup_segments(self):
97 """ 98 Parses the database file to determine what kind of database is being used and setup 99 segment sizes and start points that will be used by the seek*() methods later. 100 """ 101 self._databaseType = COUNTRY_EDITION 102 self._recordLength = STANDARD_RECORD_LENGTH 103 104 filepos = self._filehandle.tell() 105 self._filehandle.seek(-3, os.SEEK_END) 106 107 for i in range(STRUCTURE_INFO_MAX_SIZE): 108 delim = self._filehandle.read(3) 109 110 if delim == (chr(255) * 3): 111 self._databaseType = ord(self._filehandle.read(1)) 112 113 if (self._databaseType >= 106): 114 # backwards compatibility with databases from April 2003 and earlier 115 self._databaseType -= 105 116 117 if self._databaseType == REGION_EDITION_REV0: 118 self._databaseSegments = STATE_BEGIN_REV0 119 120 elif self._databaseType == REGION_EDITION_REV1: 121 self._databaseSegments = STATE_BEGIN_REV1 122 123 elif self._databaseType in (CITY_EDITION_REV0, 124 CITY_EDITION_REV1, 125 ORG_EDITION, 126 ISP_EDITION, 127 ASNUM_EDITION): 128 self._databaseSegments = 0 129 buf = self._filehandle.read(SEGMENT_RECORD_LENGTH) 130 131 for j in range(SEGMENT_RECORD_LENGTH): 132 self._databaseSegments += (ord(buf[j]) << (j * 8)) 133 134 if self._databaseType in (ORG_EDITION, ISP_EDITION): 135 self._recordLength = ORG_RECORD_LENGTH 136 137 break 138 else: 139 self._filehandle.seek(-4, os.SEEK_CUR) 140 141 if self._databaseType == COUNTRY_EDITION: 142 self._databaseSegments = COUNTRY_BEGIN 143 144 self._filehandle.seek(filepos, os.SEEK_SET)
145
146 - def _lookup_country_id(self, addr):
147 """ 148 Get the country index. 149 150 This method is called by the _lookupCountryCode and _lookupCountryName 151 methods. It looks up the index ('id') for the country which is the key 152 for the code and name. 153 154 @param addr: The IP address 155 @type addr: str 156 @return: network byte order 32-bit integer 157 @rtype: int 158 """ 159 160 ipnum = ip2long(addr) 161 162 if not ipnum: 163 raise ValueError("Invalid IP address: %s" % addr) 164 165 if self._databaseType != COUNTRY_EDITION: 166 raise GeoIPError('Invalid database type; country_* methods expect '\ 167 'Country database') 168 169 return self._seek_country(ipnum) - COUNTRY_BEGIN
170
171 - def _seek_country(self, ipnum):
172 """ 173 Using the record length and appropriate start points, seek to the 174 country that corresponds to the converted IP address integer. 175 176 @param ipnum: result of ip2long conversion 177 @type ipnum: int 178 @return: offset of start of record 179 @rtype: int 180 """ 181 offset = 0 182 183 for depth in range(31, -1, -1): 184 185 if self._flags & MEMORY_CACHE: 186 startIndex = 2 * self._recordLength * offset 187 length = 2 * self._recordLength 188 endIndex = startIndex + length 189 buf = self._memoryBuffer[startIndex:endIndex] 190 else: 191 self._filehandle.seek(2 * self._recordLength * offset, os.SEEK_SET) 192 buf = self._filehandle.read(2 * self._recordLength) 193 194 x = [0,0] 195 196 for i in range(2): 197 for j in range(self._recordLength): 198 x[i] += ord(buf[self._recordLength * i + j]) << (j * 8) 199 200 if ipnum & (1 << depth): 201 202 if x[1] >= self._databaseSegments: 203 return x[1] 204 205 offset = x[1] 206 207 else: 208 209 if x[0] >= self._databaseSegments: 210 return x[0] 211 212 offset = x[0] 213 214 215 raise Exception('Error traversing database - perhaps it is corrupt?')
216
217 - def _get_org(self, ipnum):
218 """ 219 Seek and return organization (or ISP) name for converted IP addr. 220 @param ipnum: Converted IP address 221 @type ipnum: int 222 @return: org/isp name 223 @rtype: str 224 """ 225 226 seek_org = self._seek_country(ipnum) 227 if seek_org == self._databaseSegments: 228 return None 229 230 record_pointer = seek_org + (2 * self._recordLength - 1) * self._databaseSegments 231 232 self._filehandle.seek(record_pointer, os.SEEK_SET) 233 234 org_buf = self._filehandle.read(MAX_ORG_RECORD_LENGTH) 235 236 return org_buf[:org_buf.index(chr(0))]
237
238 - def _get_region(self, ipnum):
239 """ 240 Seek and return the region info (dict containing country_code and region_name). 241 242 @param ipnum: converted IP address 243 @type ipnum: int 244 @return: dict containing country_code and region_name 245 @rtype: dict 246 """ 247 country_code = '' 248 region = '' 249 250 if self._databaseType == REGION_EDITION_REV0: 251 seek_country = self._seek_country(ipnum) 252 seek_region = seek_country - STATE_BEGIN_REV0 253 if seek_region >= 1000: 254 country_code = 'US' 255 region = ''.join([chr((seek_region / 1000) / 26 + 65), chr((seek_region / 1000) % 26 + 65)]) 256 else: 257 country_code = COUNTRY_CODES[seek_region] 258 region = '' 259 elif self._databaseType == REGION_EDITION_REV1: 260 seek_country = self._seek_country(ipnum) 261 seek_region = seek_country - STATE_BEGIN_REV1 262 if seek_region < US_OFFSET: 263 country_code = ''; 264 region = '' 265 elif seek_region < CANADA_OFFSET: 266 country_code = 'US' 267 region = ''.join([chr((seek_region - US_OFFSET) / 26 + 65), chr((seek_region - US_OFFSET) % 26 + 65)]) 268 elif seek_region < WORLD_OFFSET: 269 country_code = 'CA' 270 region = ''.join([chr((seek_region - CANADA_OFFSET) / 26 + 65), chr((seek_region - CANADA_OFFSET) % 26 + 65)]) 271 else: 272 i = (seek_region - WORLD_OFFSET) / FIPS_RANGE 273 if i in COUNTRY_CODES: 274 country_code = COUNTRY_CODES[(seek_region - WORLD_OFFSET) / FIPS_RANGE] 275 else: 276 country_code = '' 277 region = '' 278 279 elif self._databaseType in (CITY_EDITION_REV0, CITY_EDITION_REV1): 280 rec = self._get_record(ipnum) 281 country_code = rec['country_code'] 282 region = rec['region_name'] 283 284 return {'country_code' : country_code, 'region_name' : region }
285
286 - def _get_record(self, ipnum):
287 """ 288 Populate location dict for converted IP. 289 290 @param ipnum: converted IP address 291 @type ipnum: int 292 @return: dict with country_code, country_code3, country_name, 293 region, city, postal_code, latitude, longitude, 294 dma_code, metro_code, area_code, region_name, time_zone 295 @rtype: dict 296 """ 297 seek_country = self._seek_country(ipnum) 298 if seek_country == self._databaseSegments: 299 return None 300 301 record_pointer = seek_country + (2 * self._recordLength - 1) * self._databaseSegments 302 303 self._filehandle.seek(record_pointer, os.SEEK_SET) 304 record_buf = self._filehandle.read(FULL_RECORD_LENGTH) 305 306 record = {} 307 308 record_buf_pos = 0 309 char = ord(record_buf[record_buf_pos]) 310 record['country_code'] = COUNTRY_CODES[char] 311 record['country_code3'] = COUNTRY_CODES3[char] 312 record['country_name'] = COUNTRY_NAMES[char] 313 record_buf_pos += 1 314 str_length = 0 315 316 # get region 317 char = ord(record_buf[record_buf_pos+str_length]) 318 while (char != 0): 319 str_length += 1 320 char = ord(record_buf[record_buf_pos+str_length]) 321 322 if str_length > 0: 323 record['region_name'] = record_buf[record_buf_pos:record_buf_pos+str_length] 324 325 record_buf_pos += str_length + 1 326 str_length = 0 327 328 # get city 329 char = ord(record_buf[record_buf_pos+str_length]) 330 while (char != 0): 331 str_length += 1 332 char = ord(record_buf[record_buf_pos+str_length]) 333 334 if str_length > 0: 335 record['city'] = record_buf[record_buf_pos:record_buf_pos+str_length] 336 337 record_buf_pos += str_length + 1 338 str_length = 0 339 340 # get the postal code 341 char = ord(record_buf[record_buf_pos+str_length]) 342 while (char != 0): 343 str_length += 1 344 char = ord(record_buf[record_buf_pos+str_length]) 345 346 if str_length > 0: 347 record['postal_code'] = record_buf[record_buf_pos:record_buf_pos+str_length] 348 else: 349 record['postal_code'] = None 350 351 record_buf_pos += str_length + 1 352 str_length = 0 353 354 latitude = 0 355 longitude = 0 356 for j in range(3): 357 char = ord(record_buf[record_buf_pos]) 358 record_buf_pos += 1 359 latitude += (char << (j * 8)) 360 361 record['latitude'] = (latitude/10000.0) - 180.0 362 363 for j in range(3): 364 char = ord(record_buf[record_buf_pos]) 365 record_buf_pos += 1 366 longitude += (char << (j * 8)) 367 368 record['longitude'] = (longitude/10000.0) - 180.0 369 370 if self._databaseType == CITY_EDITION_REV1: 371 dmaarea_combo = 0 372 if record['country_code'] == 'US': 373 for j in range(3): 374 char = ord(record_buf[record_buf_pos]) 375 record_buf_pos += 1 376 dmaarea_combo += (char << (j*8)) 377 378 record['dma_code'] = int(math.floor(dmaarea_combo/1000)) 379 record['area_code'] = dmaarea_combo%1000 380 else: 381 record['dma_code'] = 0 382 record['area_code'] = 0 383 384 return record
385
386 - def country_code_by_addr(self, addr):
387 """ 388 Returns 2-letter country code (e.g. 'US') for specified IP address. 389 Use this method if you have a Country, Region, or City database. 390 391 @param addr: IP address 392 @type addr: str 393 @return: 2-letter country code 394 @rtype: str 395 """ 396 try: 397 if self._databaseType == COUNTRY_EDITION: 398 country_id = self._lookup_country_id(addr) 399 return COUNTRY_CODES[country_id] 400 elif self._databaseType in (REGION_EDITION_REV0, REGION_EDITION_REV1, 401 CITY_EDITION_REV0, CITY_EDITION_REV1): 402 return self.region_by_addr(addr)['country_code'] 403 else: 404 raise GeoIPError('Invalid database type; country_* methods expect '\ 405 'Country, City, or Region database') 406 407 except ValueError: 408 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
409
410 - def country_code_by_name(self, hostname):
411 """ 412 Returns 2-letter country code (e.g. 'US') for specified hostname. 413 Use this method if you have a Country, Region, or City database. 414 415 @param hostname: host name 416 @type hostname: str 417 @return: 2-letter country code 418 @rtype: str 419 """ 420 addr = socket.gethostbyname(hostname) 421 422 return self.country_code_by_addr(addr)
423
424 - def country_name_by_addr(self, addr):
425 """ 426 Returns full country name for specified IP address. 427 Use this method if you have a Country or City database. 428 429 @param addr: IP address 430 @type addr: str 431 @return: country name 432 @rtype: str 433 """ 434 try: 435 if self._databaseType == COUNTRY_EDITION: 436 country_id = self._lookup_country_id(addr) 437 return COUNTRY_NAMES[country_id] 438 elif self._databaseType in (CITY_EDITION_REV0, CITY_EDITION_REV1): 439 return self.record_by_addr(addr)['country_name'] 440 else: 441 raise GeoIPError('Invalid database type; country_* methods expect '\ 442 'Country or City database') 443 except ValueError: 444 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
445
446 - def country_name_by_name(self, hostname):
447 """ 448 Returns full country name for specified hostname. 449 Use this method if you have a Country database. 450 451 @param hostname: host name 452 @type hostname: str 453 @return: country name 454 @rtype: str 455 """ 456 addr = socket.gethostbyname(hostname) 457 return self.country_name_by_addr(addr)
458
459 - def org_by_addr(self, addr):
460 """ 461 Lookup the organization (or ISP) for given IP address. 462 Use this method if you have an Organization/ISP database. 463 464 @param addr: IP address 465 @type addr: str 466 @return: organization or ISP name 467 @rtype: str 468 """ 469 try: 470 ipnum = ip2long(addr) 471 472 if not ipnum: 473 raise ValueError("Invalid IP address: %s" % addr) 474 475 if self._databaseType not in (ORG_EDITION, ISP_EDITION): 476 raise GeoIPError('Invalid database type; org_* methods expect '\ 477 'Org/ISP database') 478 479 return self._get_org(ipnum) 480 except ValueError: 481 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
482
483 - def org_by_name(self, hostname):
484 """ 485 Lookup the organization (or ISP) for hostname. 486 Use this method if you have an Organization/ISP database. 487 488 @param hostname: host name 489 @type hostname: str 490 @return: organization or ISP name 491 @rtype: str 492 """ 493 addr = socket.gethostbyname(hostname) 494 495 return self.org_by_addr(addr)
496
497 - def record_by_addr(self, addr):
498 """ 499 Look up the record for a given IP address. 500 Use this method if you have a City database. 501 502 @param addr: IP address 503 @type addr: str 504 @return: dict with country_code, country_code3, country_name, 505 region, city, postal_code, latitude, longitude, 506 dma_code, metro_code, area_code, region_name, time_zone 507 @rtype: dict 508 """ 509 try: 510 ipnum = ip2long(addr) 511 512 if not ipnum: 513 raise ValueError("Invalid IP address: %s" % addr) 514 515 if not self._databaseType in (CITY_EDITION_REV0, CITY_EDITION_REV1): 516 raise GeoIPError('Invalid database type; record_* methods expect City database') 517 518 return self._get_record(ipnum) 519 except ValueError: 520 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
521
522 - def record_by_name(self, hostname):
523 """ 524 Look up the record for a given hostname. 525 Use this method if you have a City database. 526 527 @param hostname: host name 528 @type hostname: str 529 @return: dict with country_code, country_code3, country_name, 530 region, city, postal_code, latitude, longitude, 531 dma_code, metro_code, area_code, region_name, time_zone 532 @rtype: dict 533 """ 534 addr = socket.gethostbyname(hostname) 535 536 return self.record_by_addr(addr)
537
538 - def region_by_addr(self, addr):
539 """ 540 Lookup the region for given IP address. 541 Use this method if you have a Region database. 542 543 @param addr: IP address 544 @type addr: str 545 @return: dict containing country_code, region, 546 and region_name 547 @rtype: dict 548 """ 549 try: 550 ipnum = ip2long(addr) 551 552 if not ipnum: 553 raise ValueError("Invalid IP address: %s" % addr) 554 555 if not self._databaseType in (REGION_EDITION_REV0, REGION_EDITION_REV1, 556 CITY_EDITION_REV0, CITY_EDITION_REV1): 557 raise GeoIPError('Invalid database type; region_* methods expect '\ 558 'Region or City database') 559 560 return self._get_region(ipnum) 561 except ValueError: 562 raise GeoIPError('*_by_addr methods only accept IP addresses. Use *_by_name for hostnames. (Address: %s)' % addr)
563
564 - def region_by_name(self, hostname):
565 """ 566 Lookup the region for given hostname. 567 Use this method if you have a Region database. 568 569 @param hostname: host name 570 @type hostname: str 571 @return: dict containing country_code, region, 572 and region_name 573 @rtype: dict 574 """ 575 addr = socket.gethostbyname(hostname) 576 return self.region_by_addr(addr)
577