1 """
2 Object operations
3
4 An Object is analogous to a file on a conventional filesystem. You can
5 read data from, or write data to your Objects. You can also associate
6 arbitrary metadata with them.
7
8 See COPYING for license information.
9 """
10
11 try:
12 from hashlib import md5
13 except ImportError:
14 from md5 import md5
15 import StringIO
16 import mimetypes
17 import os
18
19 from urllib import quote
20 from errors import ResponseError, NoSuchObject, \
21 InvalidObjectName, IncompleteSend, \
22 InvalidMetaName, InvalidMetaValue
23
24 from socket import timeout
25 import consts
26 from utils import requires_name
27
28
29
30
31
32
33 -class Object(object):
34 """
35 Storage data representing an object, (metadata and data).
36
37 @undocumented: _make_headers
38 @undocumented: _name_check
39 @undocumented: _initialize
40 @undocumented: compute_md5sum
41 @undocumented: __get_conn_for_write
42 @ivar name: the object's name (generally treat as read-only)
43 @type name: str
44 @ivar content_type: the object's content-type (set or read)
45 @type content_type: str
46 @ivar metadata: metadata associated with the object (set or read)
47 @type metadata: dict
48 @ivar size: the object's size (cached)
49 @type size: number
50 @ivar last_modified: date and time of last file modification (cached)
51 @type last_modified: str
52 @ivar container: the object's container (generally treat as read-only)
53 @type container: L{Container}
54 """
55
56 objsum = property(lambda self: self._etag)
57
59 self._etag = value
60 self._etag_override = True
61
62 etag = property(lambda self: self._etag, __set_etag)
63
64 - def __init__(self, container, name=None,
65 force_exists=False, object_record=None):
66 """
67 Storage objects rarely if ever need to be instantiated directly by the
68 user.
69
70 Instead, use the L{create_object<Container.create_object>},
71 L{get_object<Container.get_object>},
72 L{list_objects<Container.list_objects>} and other
73 methods on its parent L{Container} object.
74 """
75 self.container = container
76 self.last_modified = None
77 self.metadata = {}
78 if object_record:
79 self.name = object_record['name']
80 self.content_type = object_record['content_type']
81 self.size = object_record['bytes']
82 self.last_modified = object_record['last_modified']
83 self._etag = object_record['hash']
84 self._etag_override = False
85 else:
86 self.name = name
87 self.content_type = None
88 self.size = None
89 self._etag = None
90 self._etag_override = False
91 if not self._initialize() and force_exists:
92 raise NoSuchObject(self.name)
93
94 @requires_name(InvalidObjectName)
95 - def read(self, size=-1, offset=0, hdrs=None, buffer=None, callback=None):
96 """
97 Read the content from the remote storage object.
98
99 By default this method will buffer the response in memory and
100 return it as a string. However, if a file-like object is passed
101 in using the buffer keyword, the response will be written to it
102 instead.
103
104 A callback can be passed in for reporting on the progress of
105 the download. The callback should accept two integers, the first
106 will be for the amount of data written so far, the second for
107 the total size of the transfer. Note: This option is only
108 applicable when used in conjunction with the buffer option.
109
110 >>> test_object.write('hello')
111 >>> test_object.read()
112 'hello'
113
114 @param size: combined with offset, defines the length of data to be
115 read
116 @type size: number
117 @param offset: combined with size, defines the start location to be
118 read
119 @type offset: number
120 @param hdrs: an optional dict of headers to send with the request
121 @type hdrs: dictionary
122 @param buffer: an optional file-like object to write the content to
123 @type buffer: file-like object
124 @param callback: function to be used as a progress callback
125 @type callback: callable(transferred, size)
126 @rtype: str or None
127 @return: a string of all data in the object, or None if a buffer is
128 used
129 """
130 self._name_check()
131 if size > 0:
132 range = 'bytes=%d-%d' % (offset, (offset + size) - 1)
133 if hdrs:
134 hdrs['Range'] = range
135 else:
136 hdrs = {'Range': range}
137 response = self.container.conn.make_request('GET',
138 path=[self.container.name, self.name], hdrs=hdrs)
139 if (response.status < 200) or (response.status > 299):
140 response.read()
141 raise ResponseError(response.status, response.reason)
142
143 if hasattr(buffer, 'write'):
144 scratch = response.read(8192)
145 transferred = 0
146
147 while len(scratch) > 0:
148 buffer.write(scratch)
149 transferred += len(scratch)
150 if callable(callback):
151 callback(transferred, self.size)
152 scratch = response.read(8192)
153 return None
154 else:
155 return response.read()
156
158 """
159 Save the contents of the object to filename.
160
161 >>> container = connection['container1']
162 >>> obj = container.get_object('backup_file')
163 >>> obj.save_to_filename('./backup_file')
164
165 @param filename: name of the file
166 @type filename: str
167 @param callback: function to be used as a progress callback
168 @type callback: callable(transferred, size)
169 """
170 fobj = open(filename, 'wb')
171 try:
172 self.read(buffer=fobj, callback=callback)
173 finally:
174 fobj.close()
175
176 @requires_name(InvalidObjectName)
177 - def stream(self, chunksize=8192, hdrs=None):
178 """
179 Return a generator of the remote storage object's data.
180
181 Warning: The HTTP response is only complete after this generator
182 has raised a StopIteration. No other methods can be called until
183 this has occurred.
184
185 >>> test_object.write('hello')
186 >>> test_object.stream()
187 <generator object at 0xb77939cc>
188 >>> '-'.join(test_object.stream(chunksize=1))
189 'h-e-l-l-o'
190
191 @param chunksize: size in bytes yielded by the generator
192 @type chunksize: number
193 @param hdrs: an optional dict of headers to send in the request
194 @type hdrs: dict
195 @rtype: str generator
196 @return: a generator which yields strings as the object is downloaded
197 """
198 self._name_check()
199 response = self.container.conn.make_request('GET',
200 path=[self.container.name, self.name], hdrs=hdrs)
201 if response.status < 200 or response.status > 299:
202 buff = response.read()
203 raise ResponseError(response.status, response.reason)
204 buff = response.read(chunksize)
205 while len(buff) > 0:
206 yield buff
207 buff = response.read(chunksize)
208
209 buff = response.read()
210
211 @requires_name(InvalidObjectName)
233
235 headers = self._make_headers()
236
237 headers['X-Auth-Token'] = self.container.conn.token
238
239 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \
240 quote(self.container.name), quote(self.name))
241
242
243 http = self.container.conn.connection
244
245
246 http.putrequest('PUT', path)
247 for hdr in headers:
248 http.putheader(hdr, headers[hdr])
249 http.putheader('User-Agent', self.container.conn.user_agent)
250 http.endheaders()
251 return http
252
253
254 @requires_name(InvalidObjectName)
255 - def write(self, data='', verify=True, callback=None):
256 """
257 Write data to the remote storage system.
258
259 By default, server-side verification is enabled, (verify=True), and
260 end-to-end verification is performed using an md5 checksum. When
261 verification is disabled, (verify=False), the etag attribute will
262 be set to the value returned by the server, not one calculated
263 locally. When disabling verification, there is no guarantee that
264 what you think was uploaded matches what was actually stored. Use
265 this optional carefully. You have been warned.
266
267 A callback can be passed in for reporting on the progress of
268 the upload. The callback should accept two integers, the first
269 will be for the amount of data written so far, the second for
270 the total size of the transfer.
271
272 >>> test_object = container.create_object('file.txt')
273 >>> test_object.content_type = 'text/plain'
274 >>> fp = open('./file.txt')
275 >>> test_object.write(fp)
276
277 @param data: the data to be written
278 @type data: str or file
279 @param verify: enable/disable server-side checksum verification
280 @type verify: boolean
281 @param callback: function to be used as a progress callback
282 @type callback: callable(transferred, size)
283 """
284 self._name_check()
285 if isinstance(data, file):
286
287 try:
288 data.flush()
289 except IOError:
290 pass
291 self.size = int(os.fstat(data.fileno())[6])
292 else:
293 data = StringIO.StringIO(data)
294 self.size = data.len
295
296
297
298
299 if not self._etag_override:
300 self._etag = None
301
302 if not self.content_type:
303
304 type = None
305 if hasattr(data, 'name'):
306 type = mimetypes.guess_type(data.name)[0]
307 self.content_type = type and type or 'application/octet-stream'
308
309 http = self.__get_conn_for_write()
310
311 response = None
312 transfered = 0
313 running_checksum = md5()
314
315 buff = data.read(4096)
316 try:
317 while len(buff) > 0:
318 http.send(buff)
319 if verify and not self._etag_override:
320 running_checksum.update(buff)
321 buff = data.read(4096)
322 transfered += len(buff)
323 if callable(callback):
324 callback(transfered, self.size)
325 response = http.getresponse()
326 buff = response.read()
327 except timeout, err:
328 if response:
329
330 buff = response.read()
331 raise err
332 else:
333 if verify and not self._etag_override:
334 self._etag = running_checksum.hexdigest()
335
336
337
338 if (response.status < 200) or (response.status > 299):
339 raise ResponseError(response.status, response.reason)
340
341
342
343 if not verify:
344 for hdr in response.getheaders():
345 if hdr[0].lower() == 'etag':
346 self._etag = hdr[1]
347
348 @requires_name(InvalidObjectName)
349 - def send(self, iterable):
350 """
351 Write potentially transient data to the remote storage system using a
352 generator or stream.
353
354 If the object's size is not set, chunked transfer encoding will be
355 used to upload the file.
356
357 If the object's size attribute is set, it will be used as the
358 Content-Length. If the generator raises StopIteration prior to
359 yielding the right number of bytes, an IncompleteSend exception is
360 raised.
361
362 If the content_type attribute is not set then a value of
363 application/octet-stream will be used.
364
365 Server-side verification will be performed if an md5 checksum is
366 assigned to the etag property before calling this method,
367 otherwise no verification will be performed, (verification
368 can be performed afterward though by using the etag attribute
369 which is set to the value returned by the server).
370
371 >>> test_object = container.create_object('backup.tar.gz')
372 >>> pfd = os.popen('tar -czvf - ./data/', 'r')
373 >>> test_object.send(pfd)
374
375 @param iterable: stream or generator which yields the content to upload
376 @type iterable: generator or stream
377 """
378 self._name_check()
379
380 if hasattr(iterable, 'read'):
381
382 def file_iterator(file):
383 chunk = file.read(4095)
384 while chunk:
385 yield chunk
386 chunk = file.read(4095)
387 raise StopIteration()
388 iterable = file_iterator(iterable)
389
390
391 if not self._etag_override:
392 self._etag = None
393
394 if not self.content_type:
395 self.content_type = 'application/octet-stream'
396
397 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \
398 quote(self.container.name), quote(self.name))
399 headers = self._make_headers()
400 if self.size is None:
401 del headers['Content-Length']
402 headers['Transfer-Encoding'] = 'chunked'
403 headers['X-Auth-Token'] = self.container.conn.token
404 headers['User-Agent'] = self.container.conn.user_agent
405 http = self.container.conn.connection
406 http.putrequest('PUT', path)
407 for key, value in headers.iteritems():
408 http.putheader(key, value)
409 http.endheaders()
410
411 response = None
412 transferred = 0
413 try:
414 for chunk in iterable:
415 if self.size is None:
416 http.send("%X\r\n" % len(chunk))
417 http.send(chunk)
418 http.send("\r\n")
419 else:
420 http.send(chunk)
421 transferred += len(chunk)
422 if self.size is None:
423 http.send("0\r\n\r\n")
424
425 elif transferred < self.size:
426 raise IncompleteSend()
427 response = http.getresponse()
428 buff = response.read()
429 except timeout, err:
430 if response:
431
432 response.read()
433 raise err
434
435 if (response.status < 200) or (response.status > 299):
436 raise ResponseError(response.status, response.reason)
437
438 for hdr in response.getheaders():
439 if hdr[0].lower() == 'etag':
440 self._etag = hdr[1]
441
443 """
444 Put the contents of the named file into remote storage.
445
446 >>> test_object = container.create_object('file.txt')
447 >>> test_object.content_type = 'text/plain'
448 >>> test_object.load_from_filename('./my_file.txt')
449
450 @param filename: path to the file
451 @type filename: str
452 @param verify: enable/disable server-side checksum verification
453 @type verify: boolean
454 @param callback: function to be used as a progress callback
455 @type callback: callable(transferred, size)
456 """
457 fobj = open(filename, 'rb')
458 self.write(fobj, verify=verify, callback=callback)
459 fobj.close()
460
462 """
463 Initialize the Object with values from the remote service (if any).
464 """
465 if not self.name:
466 return False
467
468 response = self.container.conn.make_request(
469 'HEAD', [self.container.name, self.name])
470 response.read()
471 if response.status == 404:
472 return False
473 if (response.status < 200) or (response.status > 299):
474 raise ResponseError(response.status, response.reason)
475 for hdr in response.getheaders():
476 if hdr[0].lower() == 'content-type':
477 self.content_type = hdr[1]
478 if hdr[0].lower().startswith('x-object-meta-'):
479 self.metadata[hdr[0][14:]] = hdr[1]
480 if hdr[0].lower() == 'etag':
481 self._etag = hdr[1]
482 self._etag_override = False
483 if hdr[0].lower() == 'content-length':
484 self.size = int(hdr[1])
485 if hdr[0].lower() == 'last-modified':
486 self.last_modified = hdr[1]
487 return True
488
491
495
497 """
498 Returns a dictionary representing http headers based on the
499 respective instance attributes.
500 """
501 headers = {}
502 headers['Content-Length'] = (str(self.size) \
503 and str(self.size) != "0") \
504 and str(self.size) or "0"
505 if self._etag:
506 headers['ETag'] = self._etag
507
508 if self.content_type:
509 headers['Content-Type'] = self.content_type
510 else:
511 headers['Content-Type'] = 'application/octet-stream'
512
513 for key in self.metadata:
514 if len(key) > consts.meta_name_limit:
515 raise(InvalidMetaName(key))
516 if len(self.metadata[key]) > consts.meta_value_limit:
517 raise(InvalidMetaValue(self.metadata[key]))
518 headers['X-Object-Meta-' + key] = self.metadata[key]
519 return headers
520
521 @classmethod
523 """
524 Given an open file object, returns the md5 hexdigest of the data.
525 """
526 checksum = md5()
527 buff = fobj.read(4096)
528 while buff:
529 checksum.update(buff)
530 buff = fobj.read(4096)
531 fobj.seek(0)
532 return checksum.hexdigest()
533
535 """
536 Retrieve the URI for this object, if its container is public.
537
538 >>> container1 = connection['container1']
539 >>> container1.make_public()
540 >>> container1.create_object('file.txt').write('testing')
541 >>> container1['file.txt'].public_uri()
542 'http://c00061.cdn.cloudfiles.rackspacecloud.com/file.txt'
543
544 @return: the public URI for this object
545 @rtype: str
546 """
547 return "%s/%s" % (self.container.public_uri().rstrip('/'),
548 quote(self.name))
549
552 """
553 An iterable results set object for Objects.
554
555 This class implements dictionary- and list-like interfaces.
556 """
557 - def __init__(self, container, objects=None):
558 if objects is None:
559 objects = []
560 self._names = []
561 self._objects = []
562 for obj in objects:
563 try:
564 self._names.append(obj['name'])
565 except KeyError:
566
567 continue
568 else:
569 self._objects.append(obj)
570 self.container = container
571
574
576 return [Object(self.container, object_record=k) \
577 for k in self._objects[i:j]]
578
580 return item in self._objects
581
583 return len(self._objects)
584
586 return 'ObjectResults: %s objects' % len(self._objects)
587 __str__ = __repr__
588
589 - def index(self, value, *args):
590 """
591 returns an integer for the first index of value
592 """
593 return self._names.index(value, *args)
594
596 """
597 returns the number of occurrences of value
598 """
599 return self._names.count(value)
600
601
602