1
2
3
4 """Non-SWORD2 specific Atom/APP helper classes.
5
6 Most often used class will be 'Entry' - it provides an easy means to make an atom:entry
7 document which can be used directly as the metadata entry.
8
9 Also provides Category, which is a convenience function to simplify reading in category information from an atom:entry
10 """
11
12 from sword2_logging import logging
13 from implementation_info import __version__
14 coll_l = logging.getLogger(__name__)
15
16 from compatible_libs import etree
17 from utils import NS, get_text
18
19 from datetime import datetime
20
22 """Convenience class to aid in the intepreting of atom:category elements in XML. Currently, this is read-only.
23
24 Usage:
25
26 >>> from sword2 import Category
27
28 ... # `Category` expects an etree.SubElement node (`c_node` in this example) referencing an <atom:category> element:
29 <atom:category term="...." scheme="...." label="....."> .... </atom:category>
30
31 # Load a `Category` instance:
32 >>> c = Category(dom = c_node)
33
34 # Overrides `__str__` to provide a simple means to view the content
35 >>> print c
36 "Category scheme:http://purl.org/net/sword/terms/ term:http://purl.org/net/sword/terms/originalDeposit label:Orignal Deposit text:'None'"
37
38 # Element attributes appear as object attibutes:
39 >>> c.scheme
40 'http://purl.org/net/sword/terms/'
41
42 # Element text will be in the text attribute, if text is present
43 >>> c.text
44 None
45
46 """
47 - def __init__(self, term=None,
48 scheme=None,
49 label=None,
50 text=None,
51 dom=None):
52 """Init a `Category` class - 99% of the time, this will be done by setting the dom parameter.
53
54 However, if (for testing) there is a need to 'fake' a `Category`, all the attributes can be set in the constructor."""
55 self.term = term
56 self.scheme = scheme
57 self.label = label
58 self.text = text
59 if dom != None:
60 self.dom = dom
61 self._from_element(self.dom)
62
64 """ Load the `Category`'s internal attributes using the information within an `etree.SubElement`
65
66 """
67 for item in e.attrib.keys():
68 if item.endswith("scheme"):
69 self.scheme = e.attrib[item]
70 elif item.endswith("term"):
71 self.term = e.attrib[item]
72 elif item.endswith("label"):
73 self.label = e.attrib[item]
74 if e.text:
75 self.text = e.text
76
78 """Rudimentary way to display the data held, in a way amenable to stdout."""
79 return "Category scheme:%s term:%s label:%s text:'%s'" % (self.scheme,
80 self.term,
81 self.label,
82 self.text)
83
84
86 """Used to create `Entry`s - for multipart/metadata submission. Has a simple and extendable way to add in
87 namespace-aware key-value pairs.
88
89 Example of use:
90
91 >>> from sword2 import Entry
92 >>> e = Entry() # it can be opened blank, but more usefully...
93 >>> e = Entry(id="atom id",
94 title="atom title",
95 dcterms_identifier="some other id")
96
97 # Getting the bytestring document
98 >>> print str(e)
99 <?xml version="1.0"?><entry xmlns="http://www.w3.org/2005/Atom" xmlns:dcterms="http://purl.org/dc/terms/">
100 <generator uri="http://bitbucket.org/beno/python-sword2" version="0.1"/>
101 <updated>2011-06-05T16:20:34.914474</updated><dcterms:identifier>some other id</dcterms:identifier><id>atom id</id><title>atom title</title></entry>
102
103
104 # Adding fields to the metadata entry
105 # dcterms (and other, non-atom fields) can be used by passing in a parameter with an underscore between the
106 # prefix and element name, eg:
107 >>> e.add_fields(dcterms_title= "dcterms title", dcterms_some_other_field = "other")
108
109 # atom:author field is treated slightly differently than all the other fields:
110 # dictionary is required
111 >>> e.add_fields(author={"name":"Ben", "email":"foo@example.org"})
112 >>> print str(e)
113 <?xml version="1.0"?>
114 <entry xmlns="http://www.w3.org/2005/Atom" xmlns:dcterms="http://purl.org/dc/terms/">
115 <generator uri="http://bitbucket.org/beno/python-sword2" version="0.1"/>
116 <updated>2011-06-05T16:20:34.914474</updated>
117 <dcterms:identifier>some other id</dcterms:identifier>
118 <id>atom id</id><title>atom title</title>
119 <author>
120 <name>Ben</name>
121 <email>foo@example.org</email>
122 </author>
123 <dcterms:some_other_field>other</dcterms:some_other_field>
124 <dcterms:title>dcterms title</dcterms:title>
125 </entry>
126 >>>
127
128 # Other namespaces - use `Entry.register_namespace` to add them to the list of those considered (prefix, URL):
129 >>> e.register_namespace("myschema", "http://example.org")
130 >>> e.add_fields(myschema_foo = "bar")
131 >>> print str(e)
132 <?xml version="1.0"?><entry xmlns="http://www.w3.org/2005/Atom" xmlns:dcterms="http://purl.org/dc/terms/">
133 <generator uri="http://bitbucket.org/beno/python-sword2" version="0.1"/>
134 <updated>2011-06-05T16:20:34.914474</updated>
135 <dcterms:identifier>some other id</dcterms:identifier>
136 <id>atom id</id><title>atom title</title>
137 <author>
138 <name>Ben</name>
139 <email>foo@example.org</email>
140 </author>
141 <dcterms:some_other_field>other</dcterms:some_other_field>
142 <dcterms:title>dcterms title</dcterms:title>
143 <myschema:foo xmlns:myschema="http://example.org">bar</myschema:foo>
144 </entry>
145
146 This class doesn't provide editing/updating functions as the full etree API is exposed through the
147 attribute 'entry'. For example:
148
149 >>> len(e.entry.getchildren())
150 14
151 """
152 atom_fields = ['title','id','updated','summary']
153 add_ns = ['dcterms', 'atom', 'app']
154 bootstrap = """<?xml version="1.0"?>
155 <entry xmlns="http://www.w3.org/2005/Atom"
156 xmlns:dcterms="http://purl.org/dc/terms/">
157 <generator uri="http://bitbucket.org/beno/python-sword2" version="%s"/>
158 </entry>""" % __version__
159 - def __init__(self, **kw):
160 """Create a basic `Entry` document, setting the generator and a timestamp for the updated element value.
161
162 Any keyword parameters passed in will be passed to the add_fields method and added to the entry
163 bootstrap document. It's currently not possible to add a namespace and use it within the init call."""
164 self.entry = etree.fromstring(self.bootstrap)
165 if not 'updated' in kw.keys():
166 kw['updated'] = datetime.now().isoformat()
167 self.add_fields(**kw)
168
169 - def register_namespace(self, prefix, uri):
170 """Registers a namespace,, making it available for use when adding subsequent fields to the entry.
171
172 Registration will also affect the XML export, adding in the xmlns:prefix="url" attribute when required."""
173 etree.register_namespace(prefix, uri)
174 self.add_ns.append(prefix)
175 if prefix not in NS.keys():
176 NS[prefix] = "{%s}%%s" % uri
177
178 - def add_field(self, k, v):
179 """Append a single key-value pair to the `Entry` document.
180
181 eg
182
183 >>> e.add_field("myprefix_fooo", "value")
184
185 It is advisable to use the `Entry.add_fields` method instead as this is neater and simplifies element entry.
186
187 Note that the atom:author field is handled differently, as it requires certain fields from the author:
188
189 >>> e.add_field("author", {'name':".....",
190 'email':"....",
191 'uri':"...."} )
192
193 Note that this means of entry is not supported for other elements."""
194 if k in self.atom_fields:
195
196 old_e = self.entry.find(NS['atom'] % k)
197 if old_e == None:
198 e = etree.SubElement(self.entry, NS['atom'] % k)
199 e.text = v
200 else:
201 old_e.text = v
202 elif "_" in k:
203
204 nmsp, tag = k.split("_", 1)
205 if nmsp in self.add_ns:
206 e = etree.SubElement(self.entry, NS[nmsp] % tag)
207 e.text = v
208 elif k == "author" and isinstance(v, dict):
209 self.add_author(**v)
210
211 - def add_fields(self, **kw):
212 """Add in multiple elements in one method call.
213
214 Eg:
215
216 >>> e.add_fields(dcterms_title="Origin of the Species",
217 dcterms_contributor="Darwin, Charles")
218 """
219 for k,v in kw.iteritems():
220 self.add_field(k,v)
221
222 - def add_author(self, name, uri=None, email=None):
223 """Convenience function to add in the atom:author elements in the fashion
224 required for Atom"""
225 a = etree.SubElement(self.entry, NS['atom'] % 'author')
226 n = etree.SubElement(a, NS['atom'] % 'name')
227 n.text = name
228 if uri:
229 u = etree.SubElement(a, NS['atom'] % 'uri')
230 u.text = uri
231 if email:
232 e = etree.SubElement(a, NS['atom'] % 'email')
233 e.text = email
234
236 """Export the XML to a bytestring, ready for use"""
237 xml_str = etree.tostring(self.entry)
238 if not xml_str.startswith('<?xml version="1.0"?>'):
239 xml_str = '<?xml version="1.0"?>' + xml_str
240 return xml_str
241
242 - def pretty_print(self):
243 """A version of the XML document which should be slightly more readable on the command line."""
244 return etree.tostring(self.entry, pretty_print=True)
245