1
2
3
4 """ Collection classes
5
6 These classes are used in their documented manner but most collect or group various other items
7 to make them suitable for use.
8
9 The key class is `Collection`, which is presents a simple read-only object which represents the
10 information held within a collection element in a SWORD2 document such as the Service Document.
11
12 Two other classes, `Collection_Feed` and `Sword_Statement` are works in progress for now, with limited support
13 for the things they logically handle.
14
15 """
16
17 from sword2_logging import logging
18 from implementation_info import __version__
19 coll_l = logging.getLogger(__name__)
20
21 from compatible_libs import etree
22 from utils import NS, get_text
23
24 from deposit_receipt import Deposit_Receipt
25
26 from atom_objects import Category
27
28 from datetime import datetime
29
30
32 """
33 `Collection` - holds, parses and presents simple attributes with information taken from a collection entry
34 within a SWORD2 Service Document.
35
36 This will be instanciated by a `sword2.Service_Document` and as such, is unlikely to be called explicitly.
37
38 Usage:
39
40 >>> from sword2 import SDCollection
41 >>> c = SDCollection()
42
43 .... pull an `etree.SubElement` from a service document into `collection_node`
44
45 >>> c.load_from_etree(collection_node)
46 >>> c.collectionPolicy
47 "This collection has the following policy for deposits"
48 >>> c.title
49 "Thesis Deposit"
50 """
51 - def __init__(self, title=None,
52 href=None,
53 accept=[],
54 accept_multipart=[],
55 categories=[],
56 collectionPolicy=None,
57 description = None,
58 mediation=None,
59 treatment=None,
60 acceptPackaging=[],
61 service=[],
62 dom=None):
63 """
64 Creates a `Collection` object - as used by `sword2.Service_Document`
65
66 #BETASWORD2URL
67 See http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#protocoloperations_retreivingservicedocument
68 for more details about the SWORD2 Service Document.
69
70 Usage:
71
72 Read useful information from the attributes of this object once loaded.
73
74 Attributes::
75
76 title -- <atom:title> - Title of collection, (`str`)
77 href -- <collection href=... > - Collection IRI (`str`)
78 accept -- <accept>*</accept> - the formats which this collection can take in (`list` of `str`)
79 accept_multipart -- <accept alternate="multipart-related">*</accept> - the formats which this collection can take
80 in via multipart-related (`list` of `str`)
81 categories -- <atom:catogory> - Collection category (`list` of `sword2.Category`'s)
82 collectionPolicy -- <sword:collectionPolicy> - Collection policy (`str`)
83 description -- <dcterms:description> - Collection descriptive text (`str`)
84 mediation -- <sword:mediation> - Support for mediated deposit (`True` or `False`)
85 treatment -- <sword:treatment> - from the SWORD2 specifications:
86 ".. either a human-readable statement describing treatment the deposited resource
87 has received or a IRI that dereferences to such a description."
88 acceptPackaging -- <sword:acceptPackaging> - Accepted package types (`list` of `str`)
89 from the SWORD2 specifications: "The value SHOULD be a IRI for a known packaging format"
90 service -- <sword:service> - References to nested service descriptions (`list` of `str`)
91
92 Example XML fragment that is expected: (xmlns="http://www.w3.org/2007/app")
93
94 ...
95
96 <collection href="http://swordapp.org/col-iri/43">
97 <atom:title>Collection 43</atom:title>
98 <accept>*/*</accept>
99 <accept alternate="multipart-related">*/*</accept>
100 <sword:collectionPolicy>Collection Policy</sword:collectionPolicy>
101 <dcterms:abstract>Collection Description</dcterms:abstract>
102 <sword:mediation>false</sword:mediation>
103 <sword:treatment>Treatment description</sword:treatment>
104 <sword:acceptPackaging>http://purl.org/net/sword/package/SimpleZip</sword:acceptPackaging>
105 <sword:acceptPackaging>http://purl.org/net/sword/package/METSDSpaceSIP</sword:acceptPackaging>
106 <sword:service>http://swordapp.org/sd-iri/e4</sword:service>
107 </collection>
108 ...
109
110 Parsing this fragment:
111
112 Again, this step is done by the `sword2.Service_Document`, but if the above XML was in the `doc` variable:
113
114 # Get an etree-compatible library, such as from `lxml.etree`, `xml.etree` or `elementtree.ElementTree`
115 >>> from sword2.compatible_libs import etree
116 >>> from sword2 import SDCollection
117 >>> dom = etree.fromstring(doc)
118
119 # create an `SDCollection` instance from this XML document
120 >>> c = SDCollection(dom = dom)
121
122 # query it
123 >>> c.treatment
124 "Treatment description"
125 # Non-unique elements, for example:
126 >>> c.service
127 ["http://swordapp.org/sd-iri/e4"]
128 >>> c.accept
129 ["*/*"]
130
131 """
132
133 self.title = title
134 self.href = href
135 self.accept = accept
136 self.accept_multipart = accept_multipart
137
138 self.mediation = mediation
139 self.description = description
140 self.treatment = treatment
141 self.collectionPolicy = collectionPolicy
142 self.acceptPackaging = acceptPackaging
143 self.service = service
144 self.categories = categories
145 if dom != None:
146
147
148 self.load_from_etree(dom)
149
151 """Blank this instance of `SDCollection`"""
152 self.title = None
153 self.href = None
154 self.accept = []
155 self.accept_multipart = []
156
157 self.mediation = None
158 self.description = None
159 self.treatment = None
160 self.collectionPolicy = None
161 self.acceptPackaging = []
162 self.service = None
163 self.categories = []
164
166 """
167 Parse an `etree.SubElement` into attributes in this object.
168
169 Also, caches the most recently used DOM object it is passed in
170 `self.dom`
171 """
172 self._reset()
173 self.dom = collection
174 self.title = get_text(collection, NS['atom'] % 'title')
175
176 self.href = collection.attrib.get('href', None)
177
178 for accept in collection.findall(NS['app'] % 'accept'):
179 if accept.attrib.get("alternate", None) == "multipart-related":
180 self.accept_multipart.append(accept.text)
181 else:
182 self.accept.append(accept.text)
183
184 for category_element in collection.findall(NS['atom'] % 'category'):
185 self.categories.append(Category(dom=category_element))
186
187 self.collectionPolicy = get_text(collection, NS['sword'] % 'collectionPolicy')
188
189
190 mediation = get_text(collection, NS['sword'] % 'mediation')
191 self.mediation = mediation.lower() == "true"
192
193 self.treatment = get_text(collection, NS['sword'] % 'treatment')
194 self.description = get_text(collection, NS['dcterms'] % 'abstract')
195 self.service = get_text(collection, NS['sword'] % 'service', plural = True)
196 self.acceptPackaging = get_text(collection, NS['sword'] % 'acceptPackaging', plural = True)
197
198
199 coll_l.debug(str(self))
200
202 """Provides a simple display of the pertinent information in this object suitable for CLI logging."""
203 _s = ["Collection: '%s' @ '%s'. Accept:%s" % (self.title, self.href, self.accept)]
204 if self.description:
205 _s.append("SWORD: Description - '%s'" % self.description)
206 if self.collectionPolicy:
207 _s.append("SWORD: Collection Policy - '%s'" % self.collectionPolicy)
208 if self.mediation:
209 _s.append("SWORD: Mediation? - '%s'" % self.mediation)
210 if self.treatment:
211 _s.append("SWORD: Treatment - '%s'" % self.treatment)
212 if self.acceptPackaging:
213 _s.append("SWORD: Accept Packaging: '%s'" % self.acceptPackaging)
214 if self.service:
215 _s.append("SWORD: Nested Service Documents - '%s'" % self.service)
216 for c in self.categories:
217 _s.append(str(c))
218 return "\n".join(_s)
219
221 """Provides the atom.title of the collection as part of the repr reply"""
222 return "<sword2.SDCollection - title: %s>" % self.title
223
225 """Provides a simple means to turn the important parsed information into a simple JSON-encoded form.
226
227 NB this uses the attributes of the object, not the cached DOM object, so information can be altered/added
228 on the fly."""
229 from compatible_libs import json
230 if json:
231 _j = {'title':self.title,
232 'href':self.href,
233 'description':self.description,
234 'accept':self.accept,
235 'accept_multipart':self.accept_multipart,
236 'mediation':self.mediation,
237 'treatment':self.treatment,
238 'collectionPolicy':self.collectionPolicy,
239 'acceptPackaging':self.acceptPackaging,
240 'service':self.service,
241 'categories':self.categories}
242 return json.dumps(_j)
243 else:
244 coll_l.error("Could not return information about Collection '%s' as JSON" % self.title)
245 return
246
248 """Nothing to see here yet. Move along."""
249 - def __init__(self, feed_iri=None, http_client=None, feed_xml=None):
250 self.feed_xml = feed_xml
251 self.feed_iri = feed_iri
252 self._cached = []
253 self.h = http_client
254
256 """Beginning SWORD2 Sword Statement support.
257
258 The aim is for the sword statements to be available through attributes on this object.
259
260 In the meantime, please use the low-level `self.feed` for access to an etree.Element containing the
261 parsed form of the `xml_document` it is passed.
262
263 NB if `self.parsed` is not `True`, then there has been a problem parsing the xml document so check the original text,
264 cached in `self.xml_document`
265 """
267 self.xml_document = xml_document
268 self.parsed = False
269 self.first = None
270 self.next = None
271 self.previous = None
272 self.last = None
273 self.categories = []
274 self.entries = []
275 try:
276 coll_l.info("Attempting to parse the Feed XML document")
277 self.feed = etree.fromstring(xml_document)
278 self.parsed = True
279 except Exception, e:
280 coll_l.error("Failed to parse document - %s" % e)
281 coll_l.error("XML document begins:\n %s" % xml_document[:300])
282 self.enumerate_feed()
283
285
286 for cate in self.feed.findall(NS['atom'] % 'category'):
287 self.categories.append(Category(dom = cate))
288
289 for entry in self.feed.findall(NS['atom'] % 'entry'):
290 self.entries.append(Deposit_Receipt(dom=entry))
291
292