1
2
3
4 """
5 Conventions used:
6
7 From http://www.cdlib.org/inside/diglib/pairtree/pairtreespec.html version 0.1
8
9 This is a convenience object, used as a proxy for an object inside a pairtree store.
10 As such, it shouldn't be instanciated directly.
11 """
12
13 import os, sys, shutil
14
15 import codecs
16
17 import string
18
19 from storage_exceptions import *
20
22 """
23 The important methods:
24
25 - add_bytestream(filename, bytestream, path=None, buffer_size=None):
26 -. get_bytestream(filename, streamable=False, path=None):
27 - del_file(filename, path=None):
28 - list_parts(path=None):
29
30 First, setup up a simple store in 'data' and get an object called 'bar'
31 (which will be equivalent to 'http://example.org/bar')
32
33 >>> from pairtree import PairtreeStorageFactory
34 >>> factory = PairtreeStorageFactory()
35 >>> store = factory.get_store(store_dir='data', uri_base='http://example.org/')
36 >>> bar = store.get_object('bar')
37
38 Now add a simple string to a file called 'foo.txt'
39
40 >>> bar.add_bytestream('foo.txt', 'can be any sequence of bytes')
41 >>> bar.list_parts()
42 ['foo.txt']
43 >>>
44
45 Adding buffered content from a file:
46
47 >>> with open('/home/ben/Firefox_wallpaper.png','rb') as stream:
48 ... bar.add_bytestream('Firefox_wallpaper.png', stream)
49 ...
50 >>>
51
52 Adding the same file to magic/path/inside/object - paths are automatically created on
53 demand.
54
55 >>> with open('/home/ben/Firefox_wallpaper.png','rb') as stream:
56 ... bar.add_bytestream('Firefox_wallpaper.png', stream, path='magic/path/inside/object')
57 ...
58 >>>
59
60 Removing the first copy of that file, which was added to the wrong place:
61
62 >>> bar.del_file('Firefox_wallpaper.png')
63 >>> bar.list_parts()
64 ['magic', 'foo.txt']
65 >>> bar.list_parts('magic/path')
66 ['inside']
67 >>> bar.list_parts('magic/path/inside/object')
68 ['Firefox_wallpaper.png']
69 >>>
70
71 There are also some convenience methods:
72
73 - L{add_bytestream_by_path}(self, filepath, bytestream, buffer_size=None):
74 - L{del_file_by_path}(self, filepath, bytestream):
75 - L{get_bytestream_by_path}(self, filepath, streamable=False):
76
77 The I{by_path} suffix means that you can give it the whole path as one, and it will
78 try to figure out what is intended, for example, consider the png we placed in a nested
79 directory earlier:
80
81 >>> with open('/home/ben/Firefox_wallpaper.png','rb') as stream:
82 ... bar.add_bytestream('Firefox_wallpaper.png', stream, path='magic/path/inside/object')
83 ...
84
85 This can be written as:
86
87 >>> with open('/home/ben/Firefox_wallpaper.png','rb') as stream:
88 ... bar.add_bytestream_by_path('magic/path/inside/object/Firefox_wallpaper.png', stream)
89 ...
90
91 Getting files from an object
92 ============================
93
94 The flag I{streamable} is key here - if this is set to True, then you will be passed
95 a file handle, which you must remember to close or use the construct:
96
97 >>> with bar.get_bytestream('foo.txt', streamable=True) as text:
98 ... print text.read()
99 ...
100 >>>
101
102 This is very useful for large files you wish to scan through, but do not need to hold
103 in memory all at the same time.
104
105 By setting streamable to False, the entire file is read into memory and returned:
106
107 >>> print bar.get_bytestream('foo.txt')
108 can be any sequence of bytes
109 """
110 - def __init__(self, id, fs_store_client):
111 """
112 @param id: Identifier for pairtree object
113 @type id: identifier
114 @param fs_store_client: A reference to an instance of L{PairtreeStorageClient}
115 @type fs_store_client: L{PairtreeStorageClient}
116 """
117 self.fs = fs_store_client
118 self.id = id
119 self.uri = "%s%s" % (self.fs.uri_base, id)
120
121 - def add_bytestream(self, filename, bytestream, path=None, buffer_size=None):
122 """
123 Add a string or file to a given filename within this object. a C{path} may
124 be supplied to store the file within a subdirectory of the object.
125
126 @param path: (Optional) subdirectory path to store file in
127 @type path: Directory path
128 @param filename: Name of the file to write to
129 @type filename: filename
130 @param bytestream: Either a string or a file-like object to read from
131 @type bytestream: L{str}|L{file}
132 @param buffer_size: (Optional) Used for streaming filelike objects - defines the size of the buffer
133 to read in each cycle.
134 @type buffer_size: L{int}
135 """
136 if buffer_size:
137 return self.fs.put_stream(self.id, path, filename, bytestream, buffer_size)
138 return self.fs.put_stream(self.id, path, filename, bytestream)
139
141 """
142 Add a string or file to a given filename within this object.
143
144 The following adds the contents of footxt into a file 'foo.txt' in a
145 subdirectory of the object 'data', which may or may not have existed prior
146 to this call:
147
148 >>> object.add_bytestream_by_path('data/foo.txt', footxt)
149
150 @param filepath: (Optional) path to store the file in
151 @type filepath: path to a file
152 @param bytestream: Either a string or a file-like object to read from
153 @type bytestream: L{str}|L{file}
154 @param buffer_size: (Optional) Used for streaming filelike objects - defines the size of the buffer
155 to read in each cycle.
156 @type buffer_size: L{int}
157 """
158 path, filename = os.path.split(filepath)
159 if buffer_size:
160 return self.add_bytestream(filename, bytestream, path, buffer_size)
161 return self.add_bytestream(filename, bytestream, path)
162
163 - def get_bytestream(self, filename, streamable=False, path=None, appendable=False):
164 """
165 Reads a file from a pairtree object - If streamable is set to True,
166 this returns the filehandle for that file, which must be C{close()}'d
167 once finished with. In python 2.6 and above, this can be done easily:
168
169 >>> with object.get_bytestream('image001.tif', True, 'data/images') as stream:
170 # Do something with the C{stream} handle
171 pass
172
173 stream is closed at the end of a C{with} block
174
175 If appendable is set to True, then the file is opened "wb+" and can accept writes.
176 Otherwise, the file is opened read-only.
177
178 @param path: (Optional) subdirectory path to retrieve file from
179 @type path: Directory path
180 @param filename: Name of the file to read in
181 @type filename: filename
182 @param streamable: If True, returns a filelike handle to C{read()} from -
183 I{remember to C{close()} the file!} If False, reads in the file into a
184 bytestring and return that instead.
185 @type streamable: True|False
186 @returns: Either L{file} or L{str}
187 """
188 if appendable:
189 return self.fs.get_appendable_stream(self.id, path=path, stream_name=filename)
190 else:
191 return self.fs.get_stream(self.id, path=path, stream_name=filename, streamable=streamable)
192
194 """
195 As L{get_bytestream}, but can ask for a file via a path:
196
197 >>> print object.get_bytestream('data/foo/mytext.txt')
198 ............
199
200 @param filepath: (Optional) path of the file inside the object
201 @type filepath: path to a file
202 @param streamable: If True, returns a filelike handle to C{read()} from -
203 I{remember to C{close()} the file!} If False, reads in the file into a
204 bytestring and return that instead.
205 @type streamable: True|False
206 @returns: Either L{file} or L{str}
207 """
208 path, filename = os.path.split(filepath)
209 return self.get_bytestream(filename, streamable, path, appendable)
210
211 - def add_file(self, from_file_location, path=None, new_filename=None, buffer_size=None):
212 """
213 Adds a file from a given location. Currently, the copy is due via python buffering
214 the read from one file to the other. Might be easily replaceable with a C{shutil.copy}
215 at a later date.
216
217 If no new filename is set, it will use the original filename
218
219 Aside from this, it works in the same fasion as L{add_bytestream}
220
221 @param from_file_location: File path to read the file from
222 @type from_file_location: Directory path
223 @param path: (Optional) subdirectory within object to store file in
224 @type path: Directory path
225 @param new_filename: Name of the file to write to
226 @type new_filename: filename
227 @param buffer_size: (Optional) Used for streaming filelike objects - defines the size of the buffer
228 to read in each cycle.
229 @type buffer_size: L{int}
230 """
231 if os.path.exists(from_file_location):
232 if not new_filename:
233 _, new_filename = os.path.split(from_file_location)
234 fh = open(from_file_location, 'rb')
235 if buffer_size:
236 return self.fs.put_stream(self.id, path, new_filename, bytestream=fh, buffer_size=buffer_size)
237 return self.fs.put_stream(self.id, path, new_filename, bytestream=fh)
238 fh.close()
239 else:
240 raise FileNotFoundException
241
242 - def del_file(self, filename, path=None):
243 """
244 Delete a file from the object.
245
246 If path is set, it will attempt to delete from that subpath.
247
248 @param filename: Name of the file to delete
249 @type filename: filename
250 @param path: (Optional) subdirectory within object to delete file from
251 @type path: Directory path
252 """
253 return self.fs.del_stream(self.id, filename, path)
254
256 """
257 Delete a file from the object using the filepath as a subpath within the object.
258
259 Eg::
260
261 object_root -- foo.txt
262 foo2.txt
263 data -- image1.jpg
264 image2.jpg
265
266 >>> object.del_file_by_path('data/image2.jpg')
267 >>>
268
269 @param filepath: subdirectory filepath within object to delete
270 @type filepath: Directory path
271 """
272 path, filename = os.path.split(filepath)
273 return self.del_file(filename, path)
274
275 - def del_path(self, subpath, recursive=False):
276 """
277 Delete a subpath from the object, and can do so recursively (optional)
278 If the path is found to be not "empty" (ie has not parts in it) and
279 recursive is not True, then it will raise a L{PathIsNotEmptyException}
280 @param path: subdirectory path to delete
281 @type path: Directory path
282 @param recursive: Whether the delete is recursive (think rm -rf)
283 @type recursive: bool
284 """
285 return self.fs.del_path(self.id, subpath, recursive)
286
288 """
289 List all the parts of object's root.
290
291 If path is supplied, the parts in that subdirectory are returned.
292
293 If the subpath doesn't exist, a L{ObjectNotFoundException} will be raised.
294
295 >>> object.list_parts('data/images')
296 [ 'image001.tif', 'image.... ]
297
298 @param path: (Optional) List the parts contained in C{path}'s subdirectory
299 @type path: Directory path
300 @returns: L{list}
301 """
302 return self.fs.list_parts(self.id, path)
303
305 """
306 Returns True or False depending on whether the path is a file or not.
307
308 If the file doesn't exist, False is returned.
309
310 @param path: Path to be tested
311 @type path: Directory path
312 @returns: L{bool}
313 """
314 return self.fs.isfile(self.id, filepath)
315
316 - def isdir(self, filepath):
317 """
318 Returns True or False depending on whether the path is a subdirectory or not.
319
320 If the path doesn't exist, False is returned.
321
322 @param path: Path to be tested
323 @type path: Directory path
324 @returns: L{bool}
325 """
326 return self.fs.isdir(self.id, filepath)
327