1
2
3
4 """
5 Class to accept, parse and make queriable the Service Document response.
6
7 Example:
8
9 >>> doc = '''<?xml version="1.0" ?>
10 <service xmlns:dcterms="http://purl.org/dc/terms/"
11 xmlns:sword="http://purl.org/net/sword/terms/"
12 xmlns:atom="http://www.w3.org/2005/Atom"
13 xmlns="http://www.w3.org/2007/app">
14
15 <sword:version>2.0</sword:version>
16 <sword:maxUploadSize>16777216</sword:maxUploadSize>
17
18 <workspace>
19 <atom:title>Main Site</atom:title>
20
21 <collection href="http://swordapp.org/col-iri/43">
22 <atom:title>Collection 43</atom:title>
23 <accept>*/*</accept>
24 <accept alternate="multipart-related">*/*</accept>
25 <sword:collectionPolicy>Collection Policy</sword:collectionPolicy>
26 <dcterms:abstract>Collection Description</dcterms:abstract>
27 <sword:mediation>false</sword:mediation>
28 <sword:treatment>Treatment description</sword:treatment>
29 <sword:acceptPackaging>http://purl.org/net/sword/package/SimpleZip</sword:acceptPackaging>
30 <sword:acceptPackaging>http://purl.org/net/sword/package/METSDSpaceSIP</sword:acceptPackaging>
31 <sword:service>http://swordapp.org/sd-iri/e4</sword:service>
32 </collection>
33 </workspace>
34 </service>'''
35
36 >>> from sword2 import ServiceDocument
37 >>> s = ServiceDocument(doc)
38 >>> s.maxUploadSize
39 16777216
40 >>> s.workspaces
41 [('Main Site', [<sword2.service_document.Collection object at 0x167be10>])]
42
43 >>> for c in s.workspaces[0][1]: print c
44 ...
45 Collection: 'Collection 43' @ 'http://swordapp.org/col-iri/43'. Accept:[]
46 SWORD: Collection Policy - 'Collection Policy'
47 SWORD: Treatment - 'Treatment description'
48 SWORD: Accept Packaging: '['http://purl.org/net/sword/package/SimpleZip', 'http://purl.org/net/sword/package/METSDSpaceSIP']'
49 SWORD: Nested Service Documents - 'http://swordapp.org/sd-iri/e4'
50
51 """
52
53 from sword2_logging import logging
54 sd_l = logging.getLogger(__name__)
55
56 from collection import SDCollection
57
58 from compatible_libs import etree
59 from utils import NS, get_text
60
62 - def __init__(self, xml_response=None, sd_uri=None):
63 self.sd_uri = sd_uri
64 self.parsed = False
65 self.valid = False
66 self.maxUploadSize = 0
67 self.version = None
68 self.workspaces = []
69
70 if xml_response:
71 self.load_document(xml_response)
72
74 try:
75 if self.sd_uri:
76 sd_l.debug("Attempting to load service document for %s" % self.sd_uri)
77 else:
78 sd_l.debug("Attempting to load service document")
79 self.raw_response = xml_response
80 self.service_dom = etree.fromstring(xml_response)
81 self.parsed = True
82 self.valid = self.validate()
83 sd_l.info("Initial SWORD2 validation checks on service document - Valid document? %s" % self.valid)
84 self._enumerate_workspaces()
85 except Exception, e:
86
87
88 sd_l.error("Could not parse the Service Document response from the server - %s" % e)
89 sd_l.debug("Received the following raw response:")
90 sd_l.debug(self.raw_response)
91
93 valid = True
94 if not self.parsed:
95 return False
96
97
98
99 self.version = get_text(self.service_dom, NS['sword'] % "version")
100 if self.version:
101 if self.version != "2.0":
102
103
104 sd_l.error("The service document states that the server's endpoint is not SWORD 2.0 - stated version:%s" % self.version)
105 valid = False
106 else:
107 sd_l.error("The service document did not have a sword:version")
108 valid = False
109
110
111 maxupload = get_text(self.service_dom, NS['sword'] % "maxUploadSize")
112 if maxupload:
113 try:
114 self.maxUploadSize = int(maxupload)
115 except ValueError:
116
117
118 sd_l.error("The service document did not have maximum upload size parseable as an integer.")
119 valid = False
120
121
122 test_workspace = self.service_dom.find(NS['app'] % "workspace")
123 if test_workspace != None:
124 sd_l.debug("At least one app:workspace found, with at least one app:collection within it.")
125 else:
126 valid = False
127 sd_l.error("Could not find a app:workspace element in the service document.")
128
129 return valid
130
132 if not self.valid:
133 sd_l.error("The service document didn't pass the SWORD2 validation steps ('MUST' statements in spec). The workspaces and collections will not be enumerated.")
134 return
135
136 if self.sd_uri:
137 sd_l.info("Enumerating workspaces and collections from the service document for %s" % self.sd_uri)
138
139
140 self.workspaces = []
141 for workspace in self.service_dom.findall(NS['app'] % "workspace"):
142 workspace_title = get_text(workspace, NS['atom'] % 'title')
143 sd_l.debug("Found workspace '%s'" % workspace_title)
144 collections = []
145 for collection_element in workspace.findall(NS['app'] % 'collection'):
146
147 c = SDCollection()
148 c.load_from_etree(collection_element)
149
150 collections.append(c)
151 self.workspaces.append( (workspace_title, collections) )
152