Python LMF library
 All Classes Namespaces Files Functions Variables
xml.py
Go to the documentation of this file.
1 #! /usr/bin/env python
2 
3 """! @package config
4 """
5 
6 from core.lexical_resource import LexicalResource
7 from core.lexicon import Lexicon
8 from utils.xml_format import parse_xml
9 from utils.error_handling import InputError
10 from config.mdf import mdf_lmf, lmf_mdf, mdf_order, ps_partOfSpeech, pdl_paradigmLabel, pd_grammaticalNumber, pd_person, pd_anymacy, pd_clusivity
11 from config.tex import partOfSpeech_tex, paradigmLabel_tex
12 from common.range import partOfSpeech_range, paradigmLabel_range
13 
14 # If an LMF module needs to access languages or fonts, copy following lines:
15 # import config
16 # print config.xml.vernacular, config.xml.English, config.xml.national, config.xml.regional, config.xml.French
17 # print config.xml.font
18 
19 def sort_order_read(filename):
20  """! @brief Read an XML file giving sort order.
21  @param filename The name of the XML file to read with full path, for instance 'pylmflib/pylmflib/config/default/sort_order.xml'.
22  @return A Python dictionary of ordered characters.
23  """
24  order = dict()
25  type = dict()
26  root = parse_xml(filename)
27  # Parse XML elements
28  for rules in root:
29  # XML elements "rules" have 1 XML attribute: "level"
30  if rules.tag != "rules":
31  raise InputError(module_name + ".py", "XML file '%s' is not well-formatted." % filename)
32  for rule in rules:
33  # XML elements "rule" have 2 or 3 XML attributes: one for the character ("str"), a second for the rank value ("rank"), and an optional one for the type ("type")
34  if rule.tag != "rule":
35  raise InputError(module_name + ".py", "XML file '%s' is not well-formatted." % filename)
36  order.update({rule.attrib["str"] : float(rule.attrib["rank"])})
37  try:
38  type.update({rule.attrib["str"] : rule.attrib["type"]})
39  except KeyError:
40  pass
41  if len(type) != 0:
42  return order, type
43  return order
44 
45 def config_read(filename):
46  """! @brief Read an XML file giving the user configuration.
47  @param filename The name of the XML file to read with full path, for instance 'pylmflib/pylmflib/config/default/config.xml'.
48  @return A Lexical Resource.
49  """
50  import os
51  import config.xml
52  configuration = parse_xml(filename)
53  # Parse XML elements
54  for format in configuration:
55  if format.tag == "Language":
56  # XML element "Language" have several XML subelements "lang"
57  for lang in format:
58  # XML elements "lang" have 2 XML attributes: one for the nature of the language ("att"), a second for the language code ("val")
59  exec("config.xml." + lang.attrib["att"] + " = '" + lang.attrib["val"] + "'")
60  elif format.tag == "Font":
61  config.xml.font = dict()
62  # XML element "Font" have several XML subelements "font"
63  for font in format:
64  # XML elements "font" have 2 XML attributes: one for the nature of the language ("att"), a second for the variable name ("var")
65  exec("l = lambda " + font.attrib['var'] + ": " + font.text)
66  config.xml.font.update({font.attrib['att']: l})
67  elif format.tag == "LMF":
68  # Create lexical resource and set DTD version
69  lexical_resource = LexicalResource(format[0].attrib["dtdVersion"])
70  for object in format[0]:
71  if object.tag == "GlobalInformation":
72  # Set global information
73  for feat in object:
74  if feat.attrib["att"] == "languageCode":
75  lexical_resource.set_language_code(feat.attrib["val"])
76  elif feat.attrib["att"] == "author":
77  lexical_resource.set_author(feat.attrib["val"])
78  elif feat.attrib["att"] == "version":
79  lexical_resource.set_version(feat.attrib["val"])
80  elif feat.attrib["att"] == "lastUpdate":
81  lexical_resource.set_last_update(feat.attrib["val"])
82  elif feat.attrib["att"] == "license":
83  lexical_resource.set_license(feat.attrib["val"])
84  elif feat.attrib["att"] == "characterEncoding":
85  lexical_resource.set_character_encoding(feat.attrib["val"])
86  elif feat.attrib["att"] == "dateCoding":
87  lexical_resource.set_date_coding(feat.attrib["val"])
88  elif feat.attrib["att"] == "creationDate":
89  lexical_resource.set_creation_date(feat.attrib["val"])
90  elif feat.attrib["att"] == "projectName":
91  lexical_resource.set_project_name(feat.attrib["val"])
92  elif feat.attrib["att"] == "description":
93  lexical_resource.set_description(feat.attrib["val"])
94  elif object.tag == "Lexicon":
95  # Create lexicon and set identifier
96  lexicon = Lexicon(object.attrib["id"])
97  # Set lexicon attributes
98  for feat in object:
99  if feat.attrib["att"] == "language":
100  lexicon.set_language(feat.attrib["val"])
101  elif feat.attrib["att"] == "languageScript":
102  lexicon.set_languageScript(feat.attrib["val"])
103  elif feat.attrib["att"] == "label":
104  lexicon.set_label(feat.attrib["val"])
105  elif feat.attrib["att"] == "lexiconType":
106  lexicon.set_lexiconType(feat.attrib["val"])
107  elif feat.attrib["att"] == "entrySource":
108  lexicon.set_entrySource(feat.attrib["val"])
109  elif feat.attrib["att"] == "localPath":
110  lexicon.set_localPath(feat.attrib["val"])
111  # Set absolute path to audio files
112  config.xml.audio_path = os.path.abspath(os.path.abspath('.') + "/" + feat.attrib["val"]) + "/"
113  # Attach lexicon to the lexical resource
114  lexical_resource.add_lexicon(lexicon)
115  elif format.tag == "MDF":
116  for mdf in format:
117  if mdf.tag == "mdf_lmf":
118  # XML elements "mdf_lmf" have 2 XML attributes: one for the name of the marker ("marker"), a second for the variable name ("var")
119  exec("l = lambda " + mdf.attrib['var'] + ": " + mdf.text)
120  mdf_lmf.update({mdf.attrib['marker']: l})
121  elif mdf.tag == "ps_partOfSpeech":
122  # XML elements "ps_partOfSpeech" have 2 XML attributes: one for the MDF value ("ps"), a second for the LMF value ("partOfSpeech")
123  ps_partOfSpeech.update({mdf.attrib['ps']: mdf.attrib['partOfSpeech']})
124  # Also automatically update range of possible values allowed for LMF part of speech LexicalEntry attribute -->
125  partOfSpeech_range.add(mdf.attrib['partOfSpeech'])
126  # And automatically update the reverse operation
127  partOfSpeech_tex.update({mdf.attrib['partOfSpeech']: mdf.attrib['ps']})
128  elif mdf.tag == "pdl_paradigmLabel":
129  # XML elements "pdl_paradigmLabel" have 2 XML attributes: one for the MDF value ("pdl"), a second for the LMF value ("paradigmLabel")
130  pdl_paradigmLabel.update({mdf.attrib['pdl']: mdf.attrib['paradigmLabel']})
131  # Also automatically update range of possible values allowed for LMF paradigm label Paradigm attribute -->
132  paradigmLabel_range.add(mdf.attrib['paradigmLabel'])
133  # And automatically update the reverse operation
134  paradigmLabel_tex.update({mdf.attrib['paradigmLabel']: mdf.attrib['pdl']})
135  elif mdf.tag == "lmf_mdf":
136  # XML elements "lmf_mdf" have 2 XML attributes: one for the name of the marker ("marker"), a second for the variable name ("var")
137  exec("l = lambda " + mdf.attrib['var'] + ": " + mdf.text)
138  lmf_mdf.update({mdf.attrib['marker']: l})
139  elif mdf.tag == "mdf_order":
140  mdf_order = []
141  for element in mdf:
142  mdf_order.append(element.tag)
143  list1 = []
144  for level1 in element:
145  list1.append(level1.tag)
146  list2 = []
147  for level2 in level1:
148  list2.append(level2.tag)
149  if len(list2) != 0:
150  list1.append(list2)
151  if len(list1) != 0:
152  mdf_order.append(list1)
153  elif format.tag == "LaTeX":
154  for param in format:
155  if param.tag == "partOfSpeech_tex":
156  # XML elements "partOfSpeech_tex" have 2 or 3 XML attributes: one for the LMF value ("partOfSpeech"), a second for the LaTeX value ("tex"), and an optional one to define language
157  try:
158  partOfSpeech_tex.update({(param.attrib['lang'], param.attrib['partOfSpeech']): param.attrib['tex']})
159  except KeyError:
160  partOfSpeech_tex.update({param.attrib['partOfSpeech']: param.attrib['tex']})
161  # Also automatically update range of possible values allowed for LMF part of speech LexicalEntry attribute -->
162  partOfSpeech_range.add(param.attrib['partOfSpeech'])
163  elif param.tag == "paradigmLabel_tex":
164  # XML elements "paradigmLabel_tex" have 2 XML attributes: one for the LMF value ("paradigmLabel"), a second for the LaTeX value ("tex")
165  paradigmLabel_tex.update({param.attrib['paradigmLabel']: param.attrib['tex']})
166  # Also automatically update range of possible values allowed for LMF paradigm label Paradigm attribute -->
167  paradigmLabel_range.add(param.attrib['paradigmLabel'])
168  else:
169  raise InputError(module_name + ".py", "XML file '%s' is not well-formatted." % filename)
170  return lexical_resource
def parse_xml
Parse an XML file.
Definition: xml_format.py:35
def sort_order_read
Read an XML file giving sort order.
Definition: xml.py:19
def config_read
Read an XML file giving the user configuration.
Definition: xml.py:45