Package pairtree :: Module pairtree_object
[hide private]
[frames] | no frames]

Source Code for Module pairtree.pairtree_object

  1  #!/usr/bin/python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """ 
  5  Conventions used: 
  6   
  7  From http://www.cdlib.org/inside/diglib/pairtree/pairtreespec.html version 0.1 
  8   
  9  This is a convenience object, used as a proxy for an object inside a pairtree store. 
 10  As such, it shouldn't be instanciated directly. 
 11  """ 
 12   
 13  import os, sys, shutil 
 14   
 15  import codecs 
 16   
 17  import string 
 18   
 19  from storage_exceptions import * 
 20   
21 -class PairtreeStorageObject(object):
22 """ 23 The important methods: 24 25 - add_bytestream(filename, bytestream, path=None, buffer_size=None): 26 -. get_bytestream(filename, streamable=False, path=None): 27 - del_file(filename, path=None): 28 - list_parts(path=None): 29 30 First, setup up a simple store in 'data' and get an object called 'bar' 31 (which will be equivalent to 'http://example.org/bar') 32 33 >>> from pairtree import PairtreeStorageFactory 34 >>> factory = PairtreeStorageFactory() 35 >>> store = factory.get_store(store_dir='data', uri_base='http://example.org/') 36 >>> bar = store.get_object('bar') 37 38 Now add a simple string to a file called 'foo.txt' 39 40 >>> bar.add_bytestream('foo.txt', 'can be any sequence of bytes') 41 >>> bar.list_parts() 42 ['foo.txt'] 43 >>> 44 45 Adding buffered content from a file: 46 47 >>> with open('/home/ben/Firefox_wallpaper.png','rb') as stream: 48 ... bar.add_bytestream('Firefox_wallpaper.png', stream) 49 ... 50 >>> 51 52 Adding the same file to magic/path/inside/object - paths are automatically created on 53 demand. 54 55 >>> with open('/home/ben/Firefox_wallpaper.png','rb') as stream: 56 ... bar.add_bytestream('Firefox_wallpaper.png', stream, path='magic/path/inside/object') 57 ... 58 >>> 59 60 Removing the first copy of that file, which was added to the wrong place: 61 62 >>> bar.del_file('Firefox_wallpaper.png') 63 >>> bar.list_parts() 64 ['magic', 'foo.txt'] 65 >>> bar.list_parts('magic/path') 66 ['inside'] 67 >>> bar.list_parts('magic/path/inside/object') 68 ['Firefox_wallpaper.png'] 69 >>> 70 71 There are also some convenience methods: 72 73 - L{add_bytestream_by_path}(self, filepath, bytestream, buffer_size=None): 74 - L{del_file_by_path}(self, filepath, bytestream): 75 - L{get_bytestream_by_path}(self, filepath, streamable=False): 76 77 The I{by_path} suffix means that you can give it the whole path as one, and it will 78 try to figure out what is intended, for example, consider the png we placed in a nested 79 directory earlier: 80 81 >>> with open('/home/ben/Firefox_wallpaper.png','rb') as stream: 82 ... bar.add_bytestream('Firefox_wallpaper.png', stream, path='magic/path/inside/object') 83 ... 84 85 This can be written as: 86 87 >>> with open('/home/ben/Firefox_wallpaper.png','rb') as stream: 88 ... bar.add_bytestream_by_path('magic/path/inside/object/Firefox_wallpaper.png', stream) 89 ... 90 91 Getting files from an object 92 ============================ 93 94 The flag I{streamable} is key here - if this is set to True, then you will be passed 95 a file handle, which you must remember to close or use the construct: 96 97 >>> with bar.get_bytestream('foo.txt', streamable=True) as text: 98 ... print text.read() 99 ... 100 >>> 101 102 This is very useful for large files you wish to scan through, but do not need to hold 103 in memory all at the same time. 104 105 By setting streamable to False, the entire file is read into memory and returned: 106 107 >>> print bar.get_bytestream('foo.txt') 108 can be any sequence of bytes 109 """
110 - def __init__(self, id, fs_store_client):
111 """ 112 @param id: Identifier for pairtree object 113 @type id: identifier 114 @param fs_store_client: A reference to an instance of L{PairtreeStorageClient} 115 @type fs_store_client: L{PairtreeStorageClient} 116 """ 117 self.fs = fs_store_client 118 self.id = id 119 self.uri = "%s%s" % (self.fs.uri_base, id)
120
121 - def add_bytestream(self, filename, bytestream, path=None, buffer_size=None):
122 """ 123 Add a string or file to a given filename within this object. a C{path} may 124 be supplied to store the file within a subdirectory of the object. 125 126 @param path: (Optional) subdirectory path to store file in 127 @type path: Directory path 128 @param filename: Name of the file to write to 129 @type filename: filename 130 @param bytestream: Either a string or a file-like object to read from 131 @type bytestream: L{str}|L{file} 132 @param buffer_size: (Optional) Used for streaming filelike objects - defines the size of the buffer 133 to read in each cycle. 134 @type buffer_size: L{int} 135 """ 136 if buffer_size: 137 return self.fs.put_stream(self.id, path, filename, bytestream, buffer_size) 138 return self.fs.put_stream(self.id, path, filename, bytestream)
139
140 - def add_bytestream_by_path(self, filepath, bytestream, buffer_size=None):
141 """ 142 Add a string or file to a given filename within this object. 143 144 The following adds the contents of footxt into a file 'foo.txt' in a 145 subdirectory of the object 'data', which may or may not have existed prior 146 to this call: 147 148 >>> object.add_bytestream_by_path('data/foo.txt', footxt) 149 150 @param filepath: (Optional) path to store the file in 151 @type filepath: path to a file 152 @param bytestream: Either a string or a file-like object to read from 153 @type bytestream: L{str}|L{file} 154 @param buffer_size: (Optional) Used for streaming filelike objects - defines the size of the buffer 155 to read in each cycle. 156 @type buffer_size: L{int} 157 """ 158 path, filename = os.path.split(filepath) 159 if buffer_size: 160 return self.add_bytestream(filename, bytestream, path, buffer_size) 161 return self.add_bytestream(filename, bytestream, path)
162
163 - def get_bytestream(self, filename, streamable=False, path=None, appendable=False):
164 """ 165 Reads a file from a pairtree object - If streamable is set to True, 166 this returns the filehandle for that file, which must be C{close()}'d 167 once finished with. In python 2.6 and above, this can be done easily: 168 169 >>> with object.get_bytestream('image001.tif', True, 'data/images') as stream: 170 # Do something with the C{stream} handle 171 pass 172 173 stream is closed at the end of a C{with} block 174 175 If appendable is set to True, then the file is opened "wb+" and can accept writes. 176 Otherwise, the file is opened read-only. 177 178 @param path: (Optional) subdirectory path to retrieve file from 179 @type path: Directory path 180 @param filename: Name of the file to read in 181 @type filename: filename 182 @param streamable: If True, returns a filelike handle to C{read()} from - 183 I{remember to C{close()} the file!} If False, reads in the file into a 184 bytestring and return that instead. 185 @type streamable: True|False 186 @returns: Either L{file} or L{str} 187 """ 188 if appendable: 189 return self.fs.get_appendable_stream(self.id, path=path, stream_name=filename) 190 else: 191 return self.fs.get_stream(self.id, path=path, stream_name=filename, streamable=streamable)
192
193 - def get_bytestream_by_path(self, filepath, streamable=False, appendable=False):
194 """ 195 As L{get_bytestream}, but can ask for a file via a path: 196 197 >>> print object.get_bytestream('data/foo/mytext.txt') 198 ............ 199 200 @param filepath: (Optional) path of the file inside the object 201 @type filepath: path to a file 202 @param streamable: If True, returns a filelike handle to C{read()} from - 203 I{remember to C{close()} the file!} If False, reads in the file into a 204 bytestring and return that instead. 205 @type streamable: True|False 206 @returns: Either L{file} or L{str} 207 """ 208 path, filename = os.path.split(filepath) 209 return self.get_bytestream(filename, streamable, path, appendable)
210
211 - def add_file(self, from_file_location, path=None, new_filename=None, buffer_size=None):
212 """ 213 Adds a file from a given location. Currently, the copy is due via python buffering 214 the read from one file to the other. Might be easily replaceable with a C{shutil.copy} 215 at a later date. 216 217 If no new filename is set, it will use the original filename 218 219 Aside from this, it works in the same fasion as L{add_bytestream} 220 221 @param from_file_location: File path to read the file from 222 @type from_file_location: Directory path 223 @param path: (Optional) subdirectory within object to store file in 224 @type path: Directory path 225 @param new_filename: Name of the file to write to 226 @type new_filename: filename 227 @param buffer_size: (Optional) Used for streaming filelike objects - defines the size of the buffer 228 to read in each cycle. 229 @type buffer_size: L{int} 230 """ 231 if os.path.exists(from_file_location): 232 if not new_filename: 233 _, new_filename = os.path.split(from_file_location) 234 fh = open(from_file_location, 'rb') 235 if buffer_size: 236 return self.fs.put_stream(self.id, path, new_filename, bytestream=fh, buffer_size=buffer_size) 237 return self.fs.put_stream(self.id, path, new_filename, bytestream=fh) 238 fh.close() 239 else: 240 raise FileNotFoundException
241
242 - def del_file(self, filename, path=None):
243 """ 244 Delete a file from the object. 245 246 If path is set, it will attempt to delete from that subpath. 247 248 @param filename: Name of the file to delete 249 @type filename: filename 250 @param path: (Optional) subdirectory within object to delete file from 251 @type path: Directory path 252 """ 253 return self.fs.del_stream(self.id, filename, path)
254
255 - def del_file_by_path(self, filepath):
256 """ 257 Delete a file from the object using the filepath as a subpath within the object. 258 259 Eg:: 260 261 object_root -- foo.txt 262 foo2.txt 263 data -- image1.jpg 264 image2.jpg 265 266 >>> object.del_file_by_path('data/image2.jpg') 267 >>> 268 269 @param filepath: subdirectory filepath within object to delete 270 @type filepath: Directory path 271 """ 272 path, filename = os.path.split(filepath) 273 return self.del_file(filename, path)
274
275 - def del_path(self, subpath, recursive=False):
276 """ 277 Delete a subpath from the object, and can do so recursively (optional) 278 If the path is found to be not "empty" (ie has not parts in it) and 279 recursive is not True, then it will raise a L{PathIsNotEmptyException} 280 @param path: subdirectory path to delete 281 @type path: Directory path 282 @param recursive: Whether the delete is recursive (think rm -rf) 283 @type recursive: bool 284 """ 285 return self.fs.del_path(self.id, subpath, recursive)
286
287 - def list_parts(self, path=None):
288 """ 289 List all the parts of object's root. 290 291 If path is supplied, the parts in that subdirectory are returned. 292 293 If the subpath doesn't exist, a L{ObjectNotFoundException} will be raised. 294 295 >>> object.list_parts('data/images') 296 [ 'image001.tif', 'image.... ] 297 298 @param path: (Optional) List the parts contained in C{path}'s subdirectory 299 @type path: Directory path 300 @returns: L{list} 301 """ 302 return self.fs.list_parts(self.id, path)
303
304 - def isfile(self, filepath):
305 """ 306 Returns True or False depending on whether the path is a file or not. 307 308 If the file doesn't exist, False is returned. 309 310 @param path: Path to be tested 311 @type path: Directory path 312 @returns: L{bool} 313 """ 314 return self.fs.isfile(self.id, filepath)
315
316 - def isdir(self, filepath):
317 """ 318 Returns True or False depending on whether the path is a subdirectory or not. 319 320 If the path doesn't exist, False is returned. 321 322 @param path: Path to be tested 323 @type path: Directory path 324 @returns: L{bool} 325 """ 326 return self.fs.isdir(self.id, filepath)
327