Package sword2 :: Module service_document
[hide private]
[frames] | no frames]

Source Code for Module sword2.service_document

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3   
  4  """ 
  5  Class to accept, parse and make queriable the Service Document response. 
  6   
  7  Example: 
  8   
  9  >>> doc = '''<?xml version="1.0" ?> 
 10  <service xmlns:dcterms="http://purl.org/dc/terms/" 
 11      xmlns:sword="http://purl.org/net/sword/terms/" 
 12      xmlns:atom="http://www.w3.org/2005/Atom" 
 13      xmlns="http://www.w3.org/2007/app"> 
 14   
 15      <sword:version>2.0</sword:version> 
 16      <sword:maxUploadSize>16777216</sword:maxUploadSize> 
 17   
 18      <workspace> 
 19          <atom:title>Main Site</atom:title> 
 20   
 21          <collection href="http://swordapp.org/col-iri/43"> 
 22              <atom:title>Collection 43</atom:title> 
 23              <accept>*/*</accept> 
 24              <accept alternate="multipart-related">*/*</accept> 
 25              <sword:collectionPolicy>Collection Policy</sword:collectionPolicy> 
 26              <dcterms:abstract>Collection Description</dcterms:abstract> 
 27              <sword:mediation>false</sword:mediation> 
 28              <sword:treatment>Treatment description</sword:treatment> 
 29              <sword:acceptPackaging>http://purl.org/net/sword/package/SimpleZip</sword:acceptPackaging> 
 30              <sword:acceptPackaging>http://purl.org/net/sword/package/METSDSpaceSIP</sword:acceptPackaging> 
 31              <sword:service>http://swordapp.org/sd-iri/e4</sword:service> 
 32          </collection> 
 33      </workspace> 
 34  </service>''' 
 35   
 36  >>> from sword2 import ServiceDocument 
 37  >>> s = ServiceDocument(doc) 
 38  >>> s.maxUploadSize 
 39  16777216 
 40  >>> s.workspaces 
 41  [('Main Site', [<sword2.service_document.Collection object at 0x167be10>])] 
 42   
 43  >>> for c in s.workspaces[0][1]: print c 
 44  ...  
 45  Collection: 'Collection 43' @ 'http://swordapp.org/col-iri/43'. Accept:[] 
 46  SWORD: Collection Policy - 'Collection Policy' 
 47  SWORD: Treatment - 'Treatment description' 
 48  SWORD: Accept Packaging: '['http://purl.org/net/sword/package/SimpleZip', 'http://purl.org/net/sword/package/METSDSpaceSIP']' 
 49  SWORD: Nested Service Documents - 'http://swordapp.org/sd-iri/e4' 
 50   
 51  """ 
 52   
 53  from sword2_logging import logging 
 54  sd_l = logging.getLogger(__name__) 
 55   
 56  from collection import SDCollection 
 57   
 58  from compatible_libs import etree 
 59  from utils import NS, get_text 
 60   
61 -class ServiceDocument(object):
62 - def __init__(self, xml_response=None, sd_uri=None):
63 self.sd_uri = sd_uri # Used mainly for debugging and logging 64 self.parsed = False 65 self.valid = False 66 self.maxUploadSize = 0 # Zero implies no limit as default, as per spec 67 self.version = None # Default to an empty string before attempting to parse 68 self.workspaces = [] # Once enumerated, this will be a list of tuples, 69 # of the form: ("Workspace Title", [list of SDCollection instances]) 70 if xml_response: 71 self.load_document(xml_response)
72
73 - def load_document(self, xml_response):
74 try: 75 if self.sd_uri: 76 sd_l.debug("Attempting to load service document for %s" % self.sd_uri) 77 else: 78 sd_l.debug("Attempting to load service document") 79 self.raw_response = xml_response 80 self.service_dom = etree.fromstring(xml_response) 81 self.parsed = True 82 self.valid = self.validate() 83 sd_l.info("Initial SWORD2 validation checks on service document - Valid document? %s" % self.valid) 84 self._enumerate_workspaces() 85 except Exception, e: 86 # Due to variability of underlying etree implementations, catching all 87 # exceptions... 88 sd_l.error("Could not parse the Service Document response from the server - %s" % e) 89 sd_l.debug("Received the following raw response:") 90 sd_l.debug(self.raw_response)
91
92 - def validate(self):
93 valid = True 94 if not self.parsed: 95 return False 96 # The SWORD server MUST specify the sword:version element with a value of 2.0 97 # -- MUST have sword:version element 98 # -- MUST have value of '2.0' 99 self.version = get_text(self.service_dom, NS['sword'] % "version") 100 if self.version: 101 if self.version != "2.0": 102 # Not a SWORD2 server... 103 # Fail here? 104 sd_l.error("The service document states that the server's endpoint is not SWORD 2.0 - stated version:%s" % self.version) 105 valid = False 106 else: 107 sd_l.error("The service document did not have a sword:version") 108 valid = False 109 110 # The SWORD server MAY specify the sword:maxUploadSize (in kB) of content that can be uploaded in one request [SWORD003] as a child of the app:service element. If provided this MUST contain an integer. 111 maxupload = get_text(self.service_dom, NS['sword'] % "maxUploadSize") 112 if maxupload: 113 try: 114 self.maxUploadSize = int(maxupload) 115 except ValueError: 116 # Unparsable as an integer. Enough to fail a validation? 117 # Strictly... yep 118 sd_l.error("The service document did not have maximum upload size parseable as an integer.") 119 valid = False 120 121 # Check for the first workspace for a collection element, just to make sure there is something there. 122 test_workspace = self.service_dom.find(NS['app'] % "workspace") 123 if test_workspace != None: 124 sd_l.debug("At least one app:workspace found, with at least one app:collection within it.") 125 else: 126 valid = False 127 sd_l.error("Could not find a app:workspace element in the service document.") 128 129 return valid
130
131 - def _enumerate_workspaces(self):
132 if not self.valid: 133 sd_l.error("The service document didn't pass the SWORD2 validation steps ('MUST' statements in spec). The workspaces and collections will not be enumerated.") 134 return 135 136 if self.sd_uri: 137 sd_l.info("Enumerating workspaces and collections from the service document for %s" % self.sd_uri) 138 139 # Reset the internally cached set 140 self.workspaces = [] 141 for workspace in self.service_dom.findall(NS['app'] % "workspace"): 142 workspace_title = get_text(workspace, NS['atom'] % 'title') 143 sd_l.debug("Found workspace '%s'" % workspace_title) 144 collections = [] 145 for collection_element in workspace.findall(NS['app'] % 'collection'): 146 # app:collection + sword extensions 147 c = SDCollection() 148 c.load_from_etree(collection_element) 149 150 collections.append(c) 151 self.workspaces.append( (workspace_title, collections) ) # Add tuple
152