Package sword2 :: Module collection
[hide private]
[frames] | no frames]

Source Code for Module sword2.collection

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """ Collection classes 
  5   
  6  These classes are used in their documented manner but most collect or group various other items 
  7  to make them suitable for use. 
  8   
  9  The key class is `Collection`, which is presents a simple read-only object which represents the 
 10  information held within a collection element in a SWORD2 document such as the Service Document. 
 11   
 12  Two other classes, `Collection_Feed` and `Sword_Statement` are works in progress for now, with limited support 
 13  for the things they logically handle. 
 14   
 15  """ 
 16   
 17  from sword2_logging import logging 
 18  from implementation_info import __version__ 
 19  coll_l = logging.getLogger(__name__) 
 20   
 21  from compatible_libs import etree 
 22  from utils import NS, get_text 
 23   
 24  from deposit_receipt import Deposit_Receipt 
 25   
 26  from atom_objects import Category 
 27   
 28  from datetime import datetime 
 29   
 30   
31 -class SDCollection(object):
32 """ 33 `Collection` - holds, parses and presents simple attributes with information taken from a collection entry 34 within a SWORD2 Service Document. 35 36 This will be instanciated by a `sword2.Service_Document` and as such, is unlikely to be called explicitly. 37 38 Usage: 39 40 >>> from sword2 import SDCollection 41 >>> c = SDCollection() 42 43 .... pull an `etree.SubElement` from a service document into `collection_node` 44 45 >>> c.load_from_etree(collection_node) 46 >>> c.collectionPolicy 47 "This collection has the following policy for deposits" 48 >>> c.title 49 "Thesis Deposit" 50 """
51 - def __init__(self, title=None, 52 href=None, 53 accept=[], 54 accept_multipart=[], 55 categories=[], 56 collectionPolicy=None, 57 description = None, 58 mediation=None, 59 treatment=None, 60 acceptPackaging=[], 61 service=[], 62 dom=None):
63 """ 64 Creates a `Collection` object - as used by `sword2.Service_Document` 65 66 #BETASWORD2URL 67 See http://sword-app.svn.sourceforge.net/viewvc/sword-app/spec/trunk/SWORDProfile.html?revision=HEAD#protocoloperations_retreivingservicedocument 68 for more details about the SWORD2 Service Document. 69 70 Usage: 71 72 Read useful information from the attributes of this object once loaded. 73 74 Attributes:: 75 76 title -- <atom:title> - Title of collection, (`str`) 77 href -- <collection href=... > - Collection IRI (`str`) 78 accept -- <accept>*</accept> - the formats which this collection can take in (`list` of `str`) 79 accept_multipart -- <accept alternate="multipart-related">*</accept> - the formats which this collection can take 80 in via multipart-related (`list` of `str`) 81 categories -- <atom:catogory> - Collection category (`list` of `sword2.Category`'s) 82 collectionPolicy -- <sword:collectionPolicy> - Collection policy (`str`) 83 description -- <dcterms:description> - Collection descriptive text (`str`) 84 mediation -- <sword:mediation> - Support for mediated deposit (`True` or `False`) 85 treatment -- <sword:treatment> - from the SWORD2 specifications: 86 ".. either a human-readable statement describing treatment the deposited resource 87 has received or a IRI that dereferences to such a description." 88 acceptPackaging -- <sword:acceptPackaging> - Accepted package types (`list` of `str`) 89 from the SWORD2 specifications: "The value SHOULD be a IRI for a known packaging format" 90 service -- <sword:service> - References to nested service descriptions (`list` of `str`) 91 92 Example XML fragment that is expected: (xmlns="http://www.w3.org/2007/app") 93 94 ... 95 96 <collection href="http://swordapp.org/col-iri/43"> 97 <atom:title>Collection 43</atom:title> 98 <accept>*/*</accept> 99 <accept alternate="multipart-related">*/*</accept> 100 <sword:collectionPolicy>Collection Policy</sword:collectionPolicy> 101 <dcterms:abstract>Collection Description</dcterms:abstract> 102 <sword:mediation>false</sword:mediation> 103 <sword:treatment>Treatment description</sword:treatment> 104 <sword:acceptPackaging>http://purl.org/net/sword/package/SimpleZip</sword:acceptPackaging> 105 <sword:acceptPackaging>http://purl.org/net/sword/package/METSDSpaceSIP</sword:acceptPackaging> 106 <sword:service>http://swordapp.org/sd-iri/e4</sword:service> 107 </collection> 108 ... 109 110 Parsing this fragment: 111 112 Again, this step is done by the `sword2.Service_Document`, but if the above XML was in the `doc` variable: 113 114 # Get an etree-compatible library, such as from `lxml.etree`, `xml.etree` or `elementtree.ElementTree` 115 >>> from sword2.compatible_libs import etree 116 >>> from sword2 import SDCollection 117 >>> dom = etree.fromstring(doc) 118 119 # create an `SDCollection` instance from this XML document 120 >>> c = SDCollection(dom = dom) 121 122 # query it 123 >>> c.treatment 124 "Treatment description" 125 # Non-unique elements, for example: 126 >>> c.service 127 ["http://swordapp.org/sd-iri/e4"] 128 >>> c.accept 129 ["*/*"] 130 131 """ 132 # APP/Atom 133 self.title = title 134 self.href = href 135 self.accept = accept 136 self.accept_multipart = accept_multipart 137 # SWORD 138 self.mediation = mediation 139 self.description = description 140 self.treatment = treatment 141 self.collectionPolicy = collectionPolicy 142 self.acceptPackaging = acceptPackaging 143 self.service = service 144 self.categories = categories 145 if dom != None: 146 # Allow constructor variables to provide defaults, but information within the 147 # XML element overwrites or appends. 148 self.load_from_etree(dom)
149
150 - def _reset(self):
151 """Blank this instance of `SDCollection`""" 152 self.title = None 153 self.href = None 154 self.accept = [] 155 self.accept_multipart = [] 156 # SWORD 157 self.mediation = None 158 self.description = None 159 self.treatment = None 160 self.collectionPolicy = None 161 self.acceptPackaging = [] 162 self.service = None 163 self.categories = []
164
165 - def load_from_etree(self, collection):
166 """ 167 Parse an `etree.SubElement` into attributes in this object. 168 169 Also, caches the most recently used DOM object it is passed in 170 `self.dom` 171 """ 172 self._reset() 173 self.dom = collection 174 self.title = get_text(collection, NS['atom'] % 'title') 175 # MUST have href attribute 176 self.href = collection.attrib.get('href', None) 177 # Accept and Accept multipart 178 for accept in collection.findall(NS['app'] % 'accept'): 179 if accept.attrib.get("alternate", None) == "multipart-related": 180 self.accept_multipart.append(accept.text) 181 else: 182 self.accept.append(accept.text) 183 # Categories 184 for category_element in collection.findall(NS['atom'] % 'category'): 185 self.categories.append(Category(dom=category_element)) 186 # SWORD extensions: 187 self.collectionPolicy = get_text(collection, NS['sword'] % 'collectionPolicy') 188 189 # Mediation: True/False 190 mediation = get_text(collection, NS['sword'] % 'mediation') 191 self.mediation = mediation.lower() == "true" 192 193 self.treatment = get_text(collection, NS['sword'] % 'treatment') 194 self.description = get_text(collection, NS['dcterms'] % 'abstract') 195 self.service = get_text(collection, NS['sword'] % 'service', plural = True) 196 self.acceptPackaging = get_text(collection, NS['sword'] % 'acceptPackaging', plural = True) 197 198 # Log collection details: 199 coll_l.debug(str(self))
200
201 - def __str__(self):
202 """Provides a simple display of the pertinent information in this object suitable for CLI logging.""" 203 _s = ["Collection: '%s' @ '%s'. Accept:%s" % (self.title, self.href, self.accept)] 204 if self.description: 205 _s.append("SWORD: Description - '%s'" % self.description) 206 if self.collectionPolicy: 207 _s.append("SWORD: Collection Policy - '%s'" % self.collectionPolicy) 208 if self.mediation: 209 _s.append("SWORD: Mediation? - '%s'" % self.mediation) 210 if self.treatment: 211 _s.append("SWORD: Treatment - '%s'" % self.treatment) 212 if self.acceptPackaging: 213 _s.append("SWORD: Accept Packaging: '%s'" % self.acceptPackaging) 214 if self.service: 215 _s.append("SWORD: Nested Service Documents - '%s'" % self.service) 216 for c in self.categories: 217 _s.append(str(c)) 218 return "\n".join(_s)
219
220 - def __repr__(self):
221 """Provides the atom.title of the collection as part of the repr reply""" 222 return "<sword2.SDCollection - title: %s>" % self.title
223
224 - def to_json(self):
225 """Provides a simple means to turn the important parsed information into a simple JSON-encoded form. 226 227 NB this uses the attributes of the object, not the cached DOM object, so information can be altered/added 228 on the fly.""" 229 from compatible_libs import json 230 if json: 231 _j = {'title':self.title, 232 'href':self.href, 233 'description':self.description, 234 'accept':self.accept, 235 'accept_multipart':self.accept_multipart, 236 'mediation':self.mediation, 237 'treatment':self.treatment, 238 'collectionPolicy':self.collectionPolicy, 239 'acceptPackaging':self.acceptPackaging, 240 'service':self.service, 241 'categories':self.categories} 242 return json.dumps(_j) 243 else: 244 coll_l.error("Could not return information about Collection '%s' as JSON" % self.title) 245 return
246
247 -class Collection_Feed(object):
248 """Nothing to see here yet. Move along."""
249 - def __init__(self, feed_iri=None, http_client=None, feed_xml=None):
250 self.feed_xml = feed_xml 251 self.feed_iri = feed_iri 252 self._cached = [] 253 self.h = http_client
254
255 -class Sword_Statement(object):
256 """Beginning SWORD2 Sword Statement support. 257 258 The aim is for the sword statements to be available through attributes on this object. 259 260 In the meantime, please use the low-level `self.feed` for access to an etree.Element containing the 261 parsed form of the `xml_document` it is passed. 262 263 NB if `self.parsed` is not `True`, then there has been a problem parsing the xml document so check the original text, 264 cached in `self.xml_document` 265 """
266 - def __init__(self, xml_document):
267 self.xml_document = xml_document 268 self.parsed = False 269 self.first = None 270 self.next = None 271 self.previous = None 272 self.last = None 273 self.categories = [] 274 self.entries = [] 275 try: 276 coll_l.info("Attempting to parse the Feed XML document") 277 self.feed = etree.fromstring(xml_document) 278 self.parsed = True 279 except Exception, e: 280 coll_l.error("Failed to parse document - %s" % e) 281 coll_l.error("XML document begins:\n %s" % xml_document[:300]) 282 self.enumerate_feed()
283
284 - def enumerate_feed(self):
285 # Handle Categories 286 for cate in self.feed.findall(NS['atom'] % 'category'): 287 self.categories.append(Category(dom = cate)) 288 # handle entries - each one is compatible with a Deposit receipt, so using that 289 for entry in self.feed.findall(NS['atom'] % 'entry'): 290 self.entries.append(Deposit_Receipt(dom=entry))
291 # TODO handle multipage first/last pagination 292