Package cloudfiles :: Module storage_object
[frames] | no frames]

Source Code for Module cloudfiles.storage_object

  1  """ 
  2  Object operations 
  3   
  4  An Object is analogous to a file on a conventional filesystem. You can 
  5  read data from, or write data to your Objects. You can also associate 
  6  arbitrary metadata with them. 
  7   
  8  See COPYING for license information. 
  9  """ 
 10   
 11  try: 
 12      from hashlib import md5 
 13  except ImportError: 
 14      from md5 import md5 
 15  import StringIO 
 16  import mimetypes 
 17  import os 
 18   
 19  from urllib  import quote 
 20  from errors  import ResponseError, NoSuchObject, \ 
 21                      InvalidObjectName, IncompleteSend, \ 
 22                      InvalidMetaName, InvalidMetaValue 
 23   
 24  from socket  import timeout 
 25  import consts 
 26  from utils   import requires_name 
27 28 # Because HTTPResponse objects *have* to have read() called on them 29 # before they can be used again ... 30 # pylint: disable-msg=W0612 31 32 33 -class Object(object):
34 """ 35 Storage data representing an object, (metadata and data). 36 37 @undocumented: _make_headers 38 @undocumented: _name_check 39 @undocumented: _initialize 40 @undocumented: compute_md5sum 41 @undocumented: __get_conn_for_write 42 @ivar name: the object's name (generally treat as read-only) 43 @type name: str 44 @ivar content_type: the object's content-type (set or read) 45 @type content_type: str 46 @ivar metadata: metadata associated with the object (set or read) 47 @type metadata: dict 48 @ivar size: the object's size (cached) 49 @type size: number 50 @ivar last_modified: date and time of last file modification (cached) 51 @type last_modified: str 52 @ivar container: the object's container (generally treat as read-only) 53 @type container: L{Container} 54 """ 55 # R/O support of the legacy objsum attr. 56 objsum = property(lambda self: self._etag) 57
58 - def __set_etag(self, value):
59 self._etag = value 60 self._etag_override = True
61 62 etag = property(lambda self: self._etag, __set_etag) 63
64 - def __init__(self, container, name=None, 65 force_exists=False, object_record=None):
66 """ 67 Storage objects rarely if ever need to be instantiated directly by the 68 user. 69 70 Instead, use the L{create_object<Container.create_object>}, 71 L{get_object<Container.get_object>}, 72 L{list_objects<Container.list_objects>} and other 73 methods on its parent L{Container} object. 74 """ 75 self.container = container 76 self.last_modified = None 77 self.metadata = {} 78 if object_record: 79 self.name = object_record['name'] 80 self.content_type = object_record['content_type'] 81 self.size = object_record['bytes'] 82 self.last_modified = object_record['last_modified'] 83 self._etag = object_record['hash'] 84 self._etag_override = False 85 else: 86 self.name = name 87 self.content_type = None 88 self.size = None 89 self._etag = None 90 self._etag_override = False 91 if not self._initialize() and force_exists: 92 raise NoSuchObject(self.name)
93 94 @requires_name(InvalidObjectName)
95 - def read(self, size=-1, offset=0, hdrs=None, buffer=None, callback=None):
96 """ 97 Read the content from the remote storage object. 98 99 By default this method will buffer the response in memory and 100 return it as a string. However, if a file-like object is passed 101 in using the buffer keyword, the response will be written to it 102 instead. 103 104 A callback can be passed in for reporting on the progress of 105 the download. The callback should accept two integers, the first 106 will be for the amount of data written so far, the second for 107 the total size of the transfer. Note: This option is only 108 applicable when used in conjunction with the buffer option. 109 110 >>> test_object.write('hello') 111 >>> test_object.read() 112 'hello' 113 114 @param size: combined with offset, defines the length of data to be 115 read 116 @type size: number 117 @param offset: combined with size, defines the start location to be 118 read 119 @type offset: number 120 @param hdrs: an optional dict of headers to send with the request 121 @type hdrs: dictionary 122 @param buffer: an optional file-like object to write the content to 123 @type buffer: file-like object 124 @param callback: function to be used as a progress callback 125 @type callback: callable(transferred, size) 126 @rtype: str or None 127 @return: a string of all data in the object, or None if a buffer is 128 used 129 """ 130 self._name_check() 131 if size > 0: 132 range = 'bytes=%d-%d' % (offset, (offset + size) - 1) 133 if hdrs: 134 hdrs['Range'] = range 135 else: 136 hdrs = {'Range': range} 137 response = self.container.conn.make_request('GET', 138 path=[self.container.name, self.name], hdrs=hdrs) 139 if (response.status < 200) or (response.status > 299): 140 response.read() 141 raise ResponseError(response.status, response.reason) 142 143 if hasattr(buffer, 'write'): 144 scratch = response.read(8192) 145 transferred = 0 146 147 while len(scratch) > 0: 148 buffer.write(scratch) 149 transferred += len(scratch) 150 if callable(callback): 151 callback(transferred, self.size) 152 scratch = response.read(8192) 153 return None 154 else: 155 return response.read()
156
157 - def save_to_filename(self, filename, callback=None):
158 """ 159 Save the contents of the object to filename. 160 161 >>> container = connection['container1'] 162 >>> obj = container.get_object('backup_file') 163 >>> obj.save_to_filename('./backup_file') 164 165 @param filename: name of the file 166 @type filename: str 167 @param callback: function to be used as a progress callback 168 @type callback: callable(transferred, size) 169 """ 170 fobj = open(filename, 'wb') 171 try: 172 self.read(buffer=fobj, callback=callback) 173 finally: 174 fobj.close()
175 176 @requires_name(InvalidObjectName)
177 - def stream(self, chunksize=8192, hdrs=None):
178 """ 179 Return a generator of the remote storage object's data. 180 181 Warning: The HTTP response is only complete after this generator 182 has raised a StopIteration. No other methods can be called until 183 this has occurred. 184 185 >>> test_object.write('hello') 186 >>> test_object.stream() 187 <generator object at 0xb77939cc> 188 >>> '-'.join(test_object.stream(chunksize=1)) 189 'h-e-l-l-o' 190 191 @param chunksize: size in bytes yielded by the generator 192 @type chunksize: number 193 @param hdrs: an optional dict of headers to send in the request 194 @type hdrs: dict 195 @rtype: str generator 196 @return: a generator which yields strings as the object is downloaded 197 """ 198 self._name_check() 199 response = self.container.conn.make_request('GET', 200 path=[self.container.name, self.name], hdrs=hdrs) 201 if response.status < 200 or response.status > 299: 202 buff = response.read() 203 raise ResponseError(response.status, response.reason) 204 buff = response.read(chunksize) 205 while len(buff) > 0: 206 yield buff 207 buff = response.read(chunksize) 208 # I hate you httplib 209 buff = response.read()
210 211 @requires_name(InvalidObjectName)
212 - def sync_metadata(self):
213 """ 214 Commits the metadata to the remote storage system. 215 216 >>> test_object = container['paradise_lost.pdf'] 217 >>> test_object.metadata = {'author': 'John Milton'} 218 >>> test_object.sync_metadata() 219 220 Object metadata can be set and retrieved through the object's 221 .metadata attribute. 222 """ 223 self._name_check() 224 if self.metadata: 225 headers = self._make_headers() 226 headers['Content-Length'] = "0" 227 response = self.container.conn.make_request( 228 'POST', [self.container.name, self.name], hdrs=headers, 229 data='') 230 response.read() 231 if response.status != 202: 232 raise ResponseError(response.status, response.reason)
233
234 - def __get_conn_for_write(self):
235 headers = self._make_headers() 236 237 headers['X-Auth-Token'] = self.container.conn.token 238 239 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \ 240 quote(self.container.name), quote(self.name)) 241 242 # Requests are handled a little differently for writes ... 243 http = self.container.conn.connection 244 245 # TODO: more/better exception handling please 246 http.putrequest('PUT', path) 247 for hdr in headers: 248 http.putheader(hdr, headers[hdr]) 249 http.putheader('User-Agent', self.container.conn.user_agent) 250 http.endheaders() 251 return http
252 253 # pylint: disable-msg=W0622 254 @requires_name(InvalidObjectName)
255 - def write(self, data='', verify=True, callback=None):
256 """ 257 Write data to the remote storage system. 258 259 By default, server-side verification is enabled, (verify=True), and 260 end-to-end verification is performed using an md5 checksum. When 261 verification is disabled, (verify=False), the etag attribute will 262 be set to the value returned by the server, not one calculated 263 locally. When disabling verification, there is no guarantee that 264 what you think was uploaded matches what was actually stored. Use 265 this optional carefully. You have been warned. 266 267 A callback can be passed in for reporting on the progress of 268 the upload. The callback should accept two integers, the first 269 will be for the amount of data written so far, the second for 270 the total size of the transfer. 271 272 >>> test_object = container.create_object('file.txt') 273 >>> test_object.content_type = 'text/plain' 274 >>> fp = open('./file.txt') 275 >>> test_object.write(fp) 276 277 @param data: the data to be written 278 @type data: str or file 279 @param verify: enable/disable server-side checksum verification 280 @type verify: boolean 281 @param callback: function to be used as a progress callback 282 @type callback: callable(transferred, size) 283 """ 284 self._name_check() 285 if isinstance(data, file): 286 # pylint: disable-msg=E1101 287 try: 288 data.flush() 289 except IOError: 290 pass # If the file descriptor is read-only this will fail 291 self.size = int(os.fstat(data.fileno())[6]) 292 else: 293 data = StringIO.StringIO(data) 294 self.size = data.len 295 296 # If override is set (and _etag is not None), then the etag has 297 # been manually assigned and we will not calculate our own. 298 299 if not self._etag_override: 300 self._etag = None 301 302 if not self.content_type: 303 # pylint: disable-msg=E1101 304 type = None 305 if hasattr(data, 'name'): 306 type = mimetypes.guess_type(data.name)[0] 307 self.content_type = type and type or 'application/octet-stream' 308 309 http = self.__get_conn_for_write() 310 311 response = None 312 transfered = 0 313 running_checksum = md5() 314 315 buff = data.read(4096) 316 try: 317 while len(buff) > 0: 318 http.send(buff) 319 if verify and not self._etag_override: 320 running_checksum.update(buff) 321 buff = data.read(4096) 322 transfered += len(buff) 323 if callable(callback): 324 callback(transfered, self.size) 325 response = http.getresponse() 326 buff = response.read() 327 except timeout, err: 328 if response: 329 # pylint: disable-msg=E1101 330 buff = response.read() 331 raise err 332 else: 333 if verify and not self._etag_override: 334 self._etag = running_checksum.hexdigest() 335 336 # ---------------------------------------------------------------- 337 338 if (response.status < 200) or (response.status > 299): 339 raise ResponseError(response.status, response.reason) 340 341 # If verification has been disabled for this write, then set the 342 # instances etag attribute to what the server returns to us. 343 if not verify: 344 for hdr in response.getheaders(): 345 if hdr[0].lower() == 'etag': 346 self._etag = hdr[1]
347 348 @requires_name(InvalidObjectName)
349 - def send(self, iterable):
350 """ 351 Write potentially transient data to the remote storage system using a 352 generator or stream. 353 354 If the object's size is not set, chunked transfer encoding will be 355 used to upload the file. 356 357 If the object's size attribute is set, it will be used as the 358 Content-Length. If the generator raises StopIteration prior to 359 yielding the right number of bytes, an IncompleteSend exception is 360 raised. 361 362 If the content_type attribute is not set then a value of 363 application/octet-stream will be used. 364 365 Server-side verification will be performed if an md5 checksum is 366 assigned to the etag property before calling this method, 367 otherwise no verification will be performed, (verification 368 can be performed afterward though by using the etag attribute 369 which is set to the value returned by the server). 370 371 >>> test_object = container.create_object('backup.tar.gz') 372 >>> pfd = os.popen('tar -czvf - ./data/', 'r') 373 >>> test_object.send(pfd) 374 375 @param iterable: stream or generator which yields the content to upload 376 @type iterable: generator or stream 377 """ 378 self._name_check() 379 380 if hasattr(iterable, 'read'): 381 382 def file_iterator(file): 383 chunk = file.read(4095) 384 while chunk: 385 yield chunk 386 chunk = file.read(4095) 387 raise StopIteration()
388 iterable = file_iterator(iterable) 389 390 # This method implicitly diables verification 391 if not self._etag_override: 392 self._etag = None 393 394 if not self.content_type: 395 self.content_type = 'application/octet-stream' 396 397 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \ 398 quote(self.container.name), quote(self.name)) 399 headers = self._make_headers() 400 if self.size is None: 401 del headers['Content-Length'] 402 headers['Transfer-Encoding'] = 'chunked' 403 headers['X-Auth-Token'] = self.container.conn.token 404 headers['User-Agent'] = self.container.conn.user_agent 405 http = self.container.conn.connection 406 http.putrequest('PUT', path) 407 for key, value in headers.iteritems(): 408 http.putheader(key, value) 409 http.endheaders() 410 411 response = None 412 transferred = 0 413 try: 414 for chunk in iterable: 415 if self.size is None: 416 http.send("%X\r\n" % len(chunk)) 417 http.send(chunk) 418 http.send("\r\n") 419 else: 420 http.send(chunk) 421 transferred += len(chunk) 422 if self.size is None: 423 http.send("0\r\n\r\n") 424 # If the generator didn't yield enough data, stop, drop, and roll. 425 elif transferred < self.size: 426 raise IncompleteSend() 427 response = http.getresponse() 428 buff = response.read() 429 except timeout, err: 430 if response: 431 # pylint: disable-msg=E1101 432 response.read() 433 raise err 434 435 if (response.status < 200) or (response.status > 299): 436 raise ResponseError(response.status, response.reason) 437 438 for hdr in response.getheaders(): 439 if hdr[0].lower() == 'etag': 440 self._etag = hdr[1]
441
442 - def load_from_filename(self, filename, verify=True, callback=None):
443 """ 444 Put the contents of the named file into remote storage. 445 446 >>> test_object = container.create_object('file.txt') 447 >>> test_object.content_type = 'text/plain' 448 >>> test_object.load_from_filename('./my_file.txt') 449 450 @param filename: path to the file 451 @type filename: str 452 @param verify: enable/disable server-side checksum verification 453 @type verify: boolean 454 @param callback: function to be used as a progress callback 455 @type callback: callable(transferred, size) 456 """ 457 fobj = open(filename, 'rb') 458 self.write(fobj, verify=verify, callback=callback) 459 fobj.close()
460
461 - def _initialize(self):
462 """ 463 Initialize the Object with values from the remote service (if any). 464 """ 465 if not self.name: 466 return False 467 468 response = self.container.conn.make_request( 469 'HEAD', [self.container.name, self.name]) 470 response.read() 471 if response.status == 404: 472 return False 473 if (response.status < 200) or (response.status > 299): 474 raise ResponseError(response.status, response.reason) 475 for hdr in response.getheaders(): 476 if hdr[0].lower() == 'content-type': 477 self.content_type = hdr[1] 478 if hdr[0].lower().startswith('x-object-meta-'): 479 self.metadata[hdr[0][14:]] = hdr[1] 480 if hdr[0].lower() == 'etag': 481 self._etag = hdr[1] 482 self._etag_override = False 483 if hdr[0].lower() == 'content-length': 484 self.size = int(hdr[1]) 485 if hdr[0].lower() == 'last-modified': 486 self.last_modified = hdr[1] 487 return True
488
489 - def __str__(self):
490 return self.name
491
492 - def _name_check(self):
493 if len(self.name) > consts.object_name_limit: 494 raise InvalidObjectName(self.name)
495
496 - def _make_headers(self):
497 """ 498 Returns a dictionary representing http headers based on the 499 respective instance attributes. 500 """ 501 headers = {} 502 headers['Content-Length'] = (str(self.size) \ 503 and str(self.size) != "0") \ 504 and str(self.size) or "0" 505 if self._etag: 506 headers['ETag'] = self._etag 507 508 if self.content_type: 509 headers['Content-Type'] = self.content_type 510 else: 511 headers['Content-Type'] = 'application/octet-stream' 512 513 for key in self.metadata: 514 if len(key) > consts.meta_name_limit: 515 raise(InvalidMetaName(key)) 516 if len(self.metadata[key]) > consts.meta_value_limit: 517 raise(InvalidMetaValue(self.metadata[key])) 518 headers['X-Object-Meta-' + key] = self.metadata[key] 519 return headers
520 521 @classmethod
522 - def compute_md5sum(cls, fobj):
523 """ 524 Given an open file object, returns the md5 hexdigest of the data. 525 """ 526 checksum = md5() 527 buff = fobj.read(4096) 528 while buff: 529 checksum.update(buff) 530 buff = fobj.read(4096) 531 fobj.seek(0) 532 return checksum.hexdigest()
533
534 - def public_uri(self):
535 """ 536 Retrieve the URI for this object, if its container is public. 537 538 >>> container1 = connection['container1'] 539 >>> container1.make_public() 540 >>> container1.create_object('file.txt').write('testing') 541 >>> container1['file.txt'].public_uri() 542 'http://c00061.cdn.cloudfiles.rackspacecloud.com/file.txt' 543 544 @return: the public URI for this object 545 @rtype: str 546 """ 547 return "%s/%s" % (self.container.public_uri().rstrip('/'), 548 quote(self.name))
549
550 551 -class ObjectResults(object):
552 """ 553 An iterable results set object for Objects. 554 555 This class implements dictionary- and list-like interfaces. 556 """
557 - def __init__(self, container, objects=None):
558 if objects is None: 559 objects = [] 560 self._names = [] 561 self._objects = [] 562 for obj in objects: 563 try: 564 self._names.append(obj['name']) 565 except KeyError: 566 # pseudo-objects from a delimiter query don't have names 567 continue 568 else: 569 self._objects.append(obj) 570 self.container = container
571
572 - def __getitem__(self, key):
573 return Object(self.container, object_record=self._objects[key])
574
575 - def __getslice__(self, i, j):
576 return [Object(self.container, object_record=k) \ 577 for k in self._objects[i:j]]
578
579 - def __contains__(self, item):
580 return item in self._objects
581
582 - def __len__(self):
583 return len(self._objects)
584
585 - def __repr__(self):
586 return 'ObjectResults: %s objects' % len(self._objects)
587 __str__ = __repr__ 588
589 - def index(self, value, *args):
590 """ 591 returns an integer for the first index of value 592 """ 593 return self._names.index(value, *args)
594
595 - def count(self, value):
596 """ 597 returns the number of occurrences of value 598 """ 599 return self._names.count(value)
600 601 # vim:set ai sw=4 ts=4 tw=0 expandtab: 602