PyMetOffice  0.4
Pythonic access to UK Met Office DataPoint API
base.py
1 #!/usr/bin/python
2 # Change the above for *nix
3 
4 ## \package base
5 # This provides the low level access to the
6 # UK Met Office DataPoint API.
7 # It is not intended for direct use, but feel free!
8 #
9 # To use these utilities, you need to read the
10 # Met Office documentation. Unfortunately,
11 # this is not always completely accurate
12 # or even internally consistent. Doubtless things will
13 # improve.
14 #
15 
16 ## This decoding hack is more or less justified because the
17 # Met Office Datapoint service is for UK only at present.
18 # ASCII won't do because some Gaelic site names have
19 # accented characters. Hence, I chose latin-1.
20 
21 import sys
22 reload(sys)
23 sys.setdefaultencoding('L1')
24 
25 import json
26 import xml.dom.minidom as xdm
27 import codecs
28 import places
29 
30 try:
31  import urllib.request
32  _urlopen = urllib.request.urlopen
33 except:
34  try:
35  import urllib2
36  _urlopen = urllib2.urlopen
37  except:
38  import urllib
39  _urlopen = urllib.urlopen
40 
41 #####################################################
42 
43 ## This is the base model object and provides
44 # low level access.
45 class baseModel(object):
46  ## Constructor
47  # \param url
48  # This is mandatory and must be the current
49  # base url for the Met Office DataPoint services.
50  # at the time of writing, this is
51  # http://datapoint.metoffice.gov.uk
52  # It has changed in the past and may well do so again.
53  # \param key
54  # This is mandatory and you must obtain a key from the
55  # Met Office DataPoint web site. Currently, this is free
56  # for modest usage (<= 5000 data requests per day and
57  # <= 100 requests/minute). Register (free)
58  # at http://www.metoffice.gov.uk/datapoint (currently).
59  def __init__(self, url, key):
60  object.__init__(self)
61  ## Holds client api-key.
62  self._key = key
63  ## Holds current DataPoint api URL.
64  self._url = url
65  self._currentLocation = ''
66  self._placeList = []
67  self._obCached = False
68  self._fcCached = False
69 
70 
71  def _getCached(self):
72  return self._obCached and self._fcCached
73  Cached = property(_getCached)
74 
75  def _getObList(self):
76  if not self._obCached:
77  self._obsSitelist = self._getObsXMLSitelist()
78  self._obCached = True
79  return self._obsSitelist
80 
81  def _getFcList(self):
82  if not self._fcCached:
83  self._fcsSitelist = self._getFcsXMLSitelist()
84  self._fcCached = True
85  return self._fcsSitelist
86 
87  ## This provides the list of observation sites.
88  # At the time of writing, there are just over
89  # 5000 of these.
90  ObservationSitelist = property(_getObList)
91  ## This provides the list of forecast sites.
92  # At the time of writing, there are just over
93  # 120 of these.
94  ForecastSitelist = property(_getFcList)
95 
96 
97  ## All data requests made to the DataPoint API come
98  # through this method. It is made as future-proof
99  # as reasonably possible but does assume that the API
100  # will stick to the URL+parameter-string format for
101  # requests
102  # \em Note: some Met Office documentation
103  # leaves out the second folder (currently data).
104  # The API would not work for me with this omission.
105  # The same documentation gives a previous base url.
106  # \param access Currently 'public'
107  # \param datatype Currently 'data'
108  # \param datacategory one of the following at present:
109  # val for values (quite a lot of requests)\n
110  # layer usually used for maps and related requests\n
111  # txt usually for requests where the interesting data is text
112  # \param dataclass Currently 'wxobs' for observations and
113  # 'wxfcs' for forecasts.
114  # \param datasource This has different meanings for different types
115  # of requests.
116  # 'all' for many requests
117  # a layer name for map requests
118  # 'mountain area' for mountain forecasts
119  # 'nationalpark' for National Park forecasts.
120  # \param dataformat Currently available formats are 'xml' and 'json'
121  # \param datarequest The Met Office calls this \em location which is
122  # rather misleading.
123  # For many requests it is a location, either as id number as a string.
124  # It may also be an image format 'png' or 'gif' at present. This is used
125  # for requests returning image data (maps etc).
126  # It may also be 'sitelist' or 'capabilities' which do
127  # what they say on the tin.
128  # \param requestargs This must be a dictionary
129  # of the other parameters required by the request. The mandatory
130  # entry in the dictionary of 'key':api-access-key is added automatically,
131  # where the api access key is obtained as described above.
132  # \returns A 2-tuple consisting of: a string which is the raw xml or json according
133  # to the dataformat requested (the caller must parse the data as needed) and
134  # the URL used for the request (for diagnostic purposes, mostly discarded).
135  def _makeApiRequest(self,
136  access,
137  datatype,
138  datacategory,
139  dataclass,
140  datasource,
141  dataformat,
142  datarequest,
143  **requestargs):
144  url = '/'.join([self._url,
145  access,
146  datatype,
147  datacategory,
148  dataclass,
149  datasource,
150  dataformat,
151  datarequest
152  ])
153  rargs = requestargs
154  rargs['key'] = self._key
155  params = ['='.join([k,rargs[k]]) for k in rargs.keys()]
156  params = '&'.join(params)
157  url = '?'.join([url,params])
158  source = _urlopen(url)
159  data = source.read(3000000)
160  return (data, url)
161 
162  ## This method provides the list of times for which
163  # observations are available.
164  # \return List of times in yyyymmddThhmmZ format
166  (d, u) = self._makeApiRequest('public',
167  'data',
168  'val',
169  'wxobs',
170  'all',
171  'json',
172  'capabilities',
173  **{'res':'hourly'})
174  return json.loads(d)['Resource']['TimeSteps']['TS']
175 
176  ## This method provides a list of days for which forecasts are
177  # available.
178  # \return List of times in yyyymmddThhmmZ format
179  def _getForecastDays(self):
180  (d, u) = self._makeApiRequest('public',
181  'data',
182  'val',
183  'wxfcs',
184  'all',
185  'json',
186  'capabilities',
187  **{'res':'daily'})
188  return json.loads(d)['Resource']['TimeSteps']['TS']
189 
190  ## This method provides a list of times for which forecasts are
191  # available.
192  # \return List of times in yyyymmddThhmmZ format
193  def _getForecastTimes(self):
194  (d, u) = self._makeApiRequest('public',
195  'data',
196  'val',
197  'wxfcs',
198  'all',
199  'json',
200  'capabilities',
201  **{'res':'3hourly'})
202  return json.loads(d)['Resource']['TimeSteps']['TS']
203 
204 
205  ## \b Warning: may not be completely reliable. Work in progress.
206  # This method obtains the list of observation sites.
207  # As currently coded it is ok but may fail if
208  # the list of sites increases dramatically. The problem
209  # is not always managing to collect all data from the
210  # connection.
211  # Actually, this is consistently quite unreliable!
212  # \return List of Site objects.
214  result = []
215  (d, u) = self._makeApiRequest('public',
216  'data',
217  'val',
218  'wxobs',
219  'all',
220  'json',
221  'sitelist',
222  **{})
223  sites = json.loads(d)['Locations']['Location']
224  for s in sites:
225  result.append(Site(**s))
226  result.sort()
227  return result
228 
229 
230  ## \b Warning: may not be completely reliable.
231  # This method obtains the list of sites for forecasts.
232  # As currently coded it is unreliable as it
233  # does not always manage to collect all data from the
234  # connection.
235  # However, the failure rate is much lower than
236  # for observation sites.
237  # \return List of Site objects.
239  result = []
240  (d, u) = self._makeApiRequest('public',
241  'data',
242  'val',
243  'wxfcs',
244  'all',
245  'json',
246  'sitelist',
247  **{})
248  sites = json.loads(d)['Locations']['Location']
249  for s in sites:
250  result.append(Site(**s))
251  result.sort()
252  return result
253 
254  ## This version retrieves XML and seems to be more reliable!
255  # No idea why that should be.
257  result = []
258  (d, u) = self._makeApiRequest('public',
259  'data',
260  'val',
261  'wxfcs',
262  'all',
263  'xml',
264  'sitelist',
265  **{})
266  doc = xdm.parseString(d)
267  sl = doc.getElementsByTagName('Location')
268  for s in sl:
269  site = Site(s.getAttribute('name'),
270  s.getAttribute('id'),
271  s.getAttribute('latitude'),
272  s.getAttribute('longitude'))
273  result.append(site)
274  result.sort()
275  return result
276 
277  ## This version retrieves observation sites using
278  # an XML request. Really provided for completeness.
279  # \return List of Site objects where observations are made.
281  result = []
282  (d, u) = self._makeApiRequest('public',
283  'data',
284  'val',
285  'wxobs',
286  'all',
287  'xml',
288  'sitelist',
289  **{})
290  doc = xdm.parseString(d)
291  sl = doc.getElementsByTagName('Location')
292  for s in sl:
293  site = Site(s.getAttribute('name'),
294  s.getAttribute('id'),
295  s.getAttribute('latitude'),
296  s.getAttribute('longitude'))
297  result.append(site)
298  result.sort()
299  return result
300 
301  ## This method obtains a list of places corresponding
302  # to a given location.
303  def _getPlaces(self, location):
304  if self._currentLocation != location:
305  self._placeList = places.PlaceSet(location)._places
306  self._currentLocation = location
307  return self._placeList
308 
309 
310  ## A method to get round the lack of 'nearestlatlon'
311  # for observations.
312  def _getNearestObservationId(self, place):
313  sites = self.ObservationSitelist
314  lt1 = float(place.getLatLon()['lat'])
315  ln1 = float(place.getLatLon()['lon'])
316  dist = 1000.0
317  st = None
318  for s in sites:
319  lt2 = float(s.latitude)
320  ln2 = float(s.longitude)
321  d = abs(ln1-ln2) + abs(lt1-lt2)
322  if d < dist:
323  dist = d
324  st = s
325  if st: st = st.id
326  return st
327 
328 
329 ## The Site class simply provides a convenient container for site information.
330 # It can be used as a sortable item in lists.
331 class Site(object):
332  ## The parameters are derived straight from JSON or XML values.
333  # Typically, one creates a Site object by
334  # site = Site(**<data-from-DataPoint>)
335  # as in the \em _getObervationSitelist code.
336  def __init__(self, name, id, latitude, longitude):
337  object.__init__(self)
338  self.name = name
339  self.id = id
340  self.latitude = latitude
341  self.longitude = longitude
342  ## This is provided so that Site objects are sortable by name.
343  def __lt__(self, other):
344  return self.name < other.name
345  ## This provides a simple printable representation.
346  # Mostly useful for testing.
347  def __repr__(self):
348  s = '{:<35}{:^6}{:>8}{:>8}'.format(self.name, self.id, self.latitude, self.longitude)
349  return s
350 
351  def __str__(self):
352  return self.__repr__()
353 
354