7 from utils.xml_format
import write_result, Element, SubElement
8 from utils.io
import ENCODING
11 """! @brief Write an XML LMF file.
12 @param object The LMF instance to write as XML.
13 @param filename The name of the XML LMF file to write with full path, for instance 'user/output.xml'.
16 root = Element(object.__class__.__name__)
23 """! @brief Create XML sub-elements to an existing XML element by parsing an LMF object instance.
24 @param object An LMF object instance.
25 @param element XML element for which sub-elements have to be created according to LMF object attributes.
28 for item
in object.__dict__.items():
32 if not attr_name.startswith(
'_'):
33 if attr_value
is not None:
35 if type(attr_value)
is bool:
36 attr_value = unicode(attr_value)
38 if type(attr_value)
is list:
40 for item
in attr_value:
41 sub_element = SubElement(element, item.__class__.__name__)
43 elif type(attr_value)
not in [int, str, unicode]:
45 sub_element = SubElement(element, attr_value.__class__.__name__)
47 elif attr_name
in [
"dtdVersion",
"id",
"targets"]:
49 if type(attr_value)
is int:
50 attr_value = unicode(attr_value)
51 element.attrib.update({attr_name: attr_value})
52 if attr_name ==
"targets":
56 feat = SubElement(element,
"feat", att=attr_name, val=attr_value)
70 """Insert an hyperlink <a href=xxx>xxx<a/> in XML.
73 from pylmflib
import options
75 if options.cross_references:
78 id = object.get_lexical_entry().get_id()
79 except AttributeError:
85 a.text = element.attrib[
"targets"]
88 return (object, element)
91 """ Handle reserved characters.
96 """Replace 'fv:xxx' and '|fv{xxx}' by '<span class="vernacular">xxx</span>'.
100 pattern =
r"(([^:\|]*)fv:([^\s\.,)]*)(.*))|(([^:\|]*)\|fv{([^}]*)}(.*))"
101 result = re.match(pattern, element.attrib[
"val"])
106 if result.group(1)
is not None:
107 before = result.group(2)
108 vernacular = result.group(3)
109 after = result.group(4)
110 elif result.group(5)
is not None:
111 before = result.group(6)
112 vernacular = result.group(7)
113 after = result.group(8)
115 if previous_span
is None:
116 element.text = before
118 previous_span.tail = before
120 span = Element(
"span")
121 span.attrib[
"class"] =
"vernacular"
122 span.text = vernacular
124 element.insert(index, span)
126 result = re.match(pattern, after)
135 """Replace 'fn:xxx' and '|fn{xxx}' by '<span class="national">xxx</span>'.
139 pattern =
r"([^:\|]*)((fn:([^\s\.,)]*)|(\|fn{([^}]*)})))(.*)"
140 result = re.match(pattern, element.attrib[
"val"])
145 before = result.group(1)
146 if result.group(4)
is not None:
147 national = result.group(4)
148 elif result.group(6)
is not None:
149 national = result.group(6)
150 after = result.group(7)
152 if previous_span
is None:
153 element.text = before
155 previous_span.tail = before
157 span = Element(
"span")
158 span.attrib[
"class"] =
"national"
161 element.insert(index, span)
163 result = re.match(pattern, after)
172 """Replace '{xxx}' by '<span class="ipa">xxx</span>'.
176 pattern =
r"([^{}]*){([^}]*)}(.*)"
177 result = re.match(pattern, element.attrib[
"val"])
182 before = result.group(1)
183 ipa = result.group(2)
184 after = result.group(3)
186 if previous_span
is None:
187 element.text = before
189 previous_span.tail = before
191 span = Element(
"span")
192 span.attrib[
"class"] =
"ipa"
195 element.insert(index, span)
197 result = re.match(pattern, after)
206 """Replace '@xxx' by '<span class="pinyin">xxx</span>'.
210 pattern =
r"([^@]*)@(\w*)(.*)"
211 result = re.match(pattern, element.attrib[
"val"])
216 before = result.group(1)
217 pinyin = result.group(2)
218 after = result.group(3)
220 if previous_span
is None:
221 element.text = before
223 previous_span.tail = before
225 span = Element(
"span")
226 span.attrib[
"class"] =
"pinyin"
229 element.insert(index, span)
231 result = re.match(pattern, after)
240 """Handle small caps.
241 Replace '°xxx' by '<span class="sc">xxx</span>'.
244 pattern =
r"([^°]*)°([^\s\.,)+/:]*)(.*)"
246 result = re.match(pattern, element.attrib[
"val"].encode(ENCODING))
251 before = result.group(1).decode(ENCODING)
252 sc = result.group(2).decode(ENCODING)
253 after = result.group(3).decode(ENCODING)
255 if previous_span
is None:
256 element.text = before
258 previous_span.tail = before
260 span = Element(
"span")
261 span.attrib[
"class"] =
"sc"
264 element.insert(index, span)
266 result = re.match(pattern, after.encode(ENCODING))
275 """Replace tones subscripts by '<sub>xxx</sub>'.
277 from utils.io
import ENCODING
279 if element.attrib[
"att"] ==
"tone":
282 if element.text
is None:
285 for c
in element.attrib[
"val"]:
286 if c
in set(
"abcd123"):
291 element.insert(index, sub)
294 previous_sub.tail =
""
298 if previous_sub
is None:
301 previous_sub.tail += c
302 if element.text == element.attrib[
"val"]:
306 if element.attrib[
"att"] !=
"lexeme":
309 tones =
"˩˧˥".decode(encoding=ENCODING)
311 current_pattern =
"([^" + tones +
"#$]+)(#?[" + tones +
"]{1,2}[$#]?)([abcd123]?)"
312 pattern =
"^" + current_pattern +
"$"
313 if re.search(pattern, element.attrib[
"val"]):
314 result = re.match(pattern, element.attrib[
"val"])
315 before = result.group(1) + result.group(2)
316 subscript = result.group(3)
317 element.text = before
318 if len(subscript) != 0:
323 element.insert(0, sub)
324 if element.text == element.attrib[
"val"]:
329 syllable =
"([^" + tones +
"#$]{2,5})(#?[" + tones +
"]{1,2}[$#]?)([abcd123]?)"
331 for syllable_nb
in range (2, 6):
332 current_pattern += syllable
333 pattern =
"^" + current_pattern +
"$"
334 if re.search(pattern, element.attrib[
"val"]):
335 result = re.match(pattern, element.attrib[
"val"])
338 if element.text
is None:
340 for i
in range (0, syllable_nb):
341 before = result.group(i*3+1) + result.group(i*3+2)
342 subscript = result.group(i*3+3)
343 if i != syllable_nb - 1:
347 if previous_sub
is None:
348 element.text += before
350 previous_sub.tail += before
351 if len(subscript) != 0:
356 element.insert(i, sub)
359 previous_sub.tail =
""
360 if element.text == element.attrib[
"val"]:
def xml_lmf_write
Write an XML LMF file.
def add_link
Functions to process XML/XHTML layout.
def build_sub_elements
Create XML sub-elements to an existing XML element by parsing an LMF object instance.