Package deefuzzer :: Package deefuzzer :: Package tools :: Module PyRSS2Gen
[hide private]
[frames] | no frames]

Source Code for Module deefuzzer.deefuzzer.tools.PyRSS2Gen

  1  """PyRSS2Gen - A Python library for generating RSS 2.0 feeds.""" 
  2   
  3  __name__ = "PyRSS2Gen" 
  4  __version__ = (1, 0, 0) 
  5  __author__ = "Andrew Dalke <dalke@dalkescientific.com>" 
  6   
  7  _generator_name = __name__ + "-" + ".".join(map(str, __version__)) 
  8   
  9  import datetime 
 10   
 11  # Could make this the base class; will need to add 'publish' 
12 -class WriteXmlMixin:
13 - def write_xml(self, outfile, encoding = "iso-8859-1"):
14 from xml.sax import saxutils 15 handler = saxutils.XMLGenerator(outfile, encoding) 16 handler.startDocument() 17 self.publish(handler) 18 handler.endDocument()
19
20 - def to_xml(self, encoding = "iso-8859-1"):
21 try: 22 import cStringIO as StringIO 23 except ImportError: 24 import StringIO 25 f = StringIO.StringIO() 26 self.write_xml(f, encoding) 27 return f.getvalue()
28 29
30 -def _element(handler, name, obj, d = {}):
31 if isinstance(obj, basestring) or obj is None: 32 # special-case handling to make the API easier 33 # to use for the common case. 34 handler.startElement(name, d) 35 if obj is not None: 36 handler.characters(obj) 37 handler.endElement(name) 38 else: 39 # It better know how to emit the correct XML. 40 obj.publish(handler)
41
42 -def _opt_element(handler, name, obj):
43 if obj is None: 44 return 45 _element(handler, name, obj)
46 47
48 -def _format_date(dt):
49 """convert a datetime into an RFC 822 formatted date 50 51 Input date must be in GMT. 52 """ 53 # Looks like: 54 # Sat, 07 Sep 2002 00:00:01 GMT 55 # Can't use strftime because that's locale dependent 56 # 57 # Isn't there a standard way to do this for Python? The 58 # rfc822 and email.Utils modules assume a timestamp. The 59 # following is based on the rfc822 module. 60 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % ( 61 ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()], 62 dt.day, 63 ["Jan", "Feb", "Mar", "Apr", "May", "Jun", 64 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1], 65 dt.year, dt.hour, dt.minute, dt.second)
66 67 68 ## 69 # A couple simple wrapper objects for the fields which 70 # take a simple value other than a string.
71 -class IntElement:
72 """implements the 'publish' API for integers 73 74 Takes the tag name and the integer value to publish. 75 76 (Could be used for anything which uses str() to be published 77 to text for XML.) 78 """ 79 element_attrs = {}
80 - def __init__(self, name, val):
81 self.name = name 82 self.val = val
83 - def publish(self, handler):
84 handler.startElement(self.name, self.element_attrs) 85 handler.characters(str(self.val)) 86 handler.endElement(self.name)
87
88 -class DateElement:
89 """implements the 'publish' API for a datetime.datetime 90 91 Takes the tag name and the datetime to publish. 92 93 Converts the datetime to RFC 2822 timestamp (4-digit year). 94 """
95 - def __init__(self, name, dt):
96 self.name = name 97 self.dt = dt
98 - def publish(self, handler):
99 _element(handler, self.name, _format_date(self.dt))
100 #### 101
102 -class Category:
103 """Publish a category element"""
104 - def __init__(self, category, domain = None):
105 self.category = category 106 self.domain = domain
107 - def publish(self, handler):
108 d = {} 109 if self.domain is not None: 110 d["domain"] = self.domain 111 _element(handler, "category", self.category, d)
112
113 -class Cloud:
114 """Publish a cloud"""
115 - def __init__(self, domain, port, path, 116 registerProcedure, protocol):
117 self.domain = domain 118 self.port = port 119 self.path = path 120 self.registerProcedure = registerProcedure 121 self.protocol = protocol
122 - def publish(self, handler):
123 _element(handler, "cloud", None, { 124 "domain": self.domain, 125 "port": str(self.port), 126 "path": self.path, 127 "registerProcedure": self.registerProcedure, 128 "protocol": self.protocol})
129
130 -class Image:
131 """Publish a channel Image""" 132 element_attrs = {}
133 - def __init__(self, url, title, link, 134 width = None, height = None, description = None):
135 self.url = url 136 self.title = title 137 self.link = link 138 self.width = width 139 self.height = height 140 self.description = description
141
142 - def publish(self, handler):
143 handler.startElement("image", self.element_attrs) 144 145 _element(handler, "url", self.url) 146 _element(handler, "title", self.title) 147 _element(handler, "link", self.link) 148 149 width = self.width 150 if isinstance(width, int): 151 width = IntElement("width", width) 152 _opt_element(handler, "width", width) 153 154 height = self.height 155 if isinstance(height, int): 156 height = IntElement("height", height) 157 _opt_element(handler, "height", height) 158 159 _opt_element(handler, "description", self.description) 160 161 handler.endElement("image")
162
163 -class Guid:
164 """Publish a guid 165 166 Defaults to being a permalink, which is the assumption if it's 167 omitted. Hence strings are always permalinks. 168 """
169 - def __init__(self, guid, isPermaLink = 1):
170 self.guid = guid 171 self.isPermaLink = isPermaLink
172 - def publish(self, handler):
173 d = {} 174 if self.isPermaLink: 175 d["isPermaLink"] = "true" 176 else: 177 d["isPermaLink"] = "false" 178 _element(handler, "guid", self.guid, d)
179
180 -class TextInput:
181 """Publish a textInput 182 183 Apparently this is rarely used. 184 """ 185 element_attrs = {}
186 - def __init__(self, title, description, name, link):
187 self.title = title 188 self.description = description 189 self.name = name 190 self.link = link
191
192 - def publish(self, handler):
193 handler.startElement("textInput", self.element_attrs) 194 _element(handler, "title", self.title) 195 _element(handler, "description", self.description) 196 _element(handler, "name", self.name) 197 _element(handler, "link", self.link) 198 handler.endElement("textInput")
199 200
201 -class Enclosure:
202 """Publish an enclosure"""
203 - def __init__(self, url, length, type):
204 self.url = url 205 self.length = length 206 self.type = type
207 - def publish(self, handler):
208 _element(handler, "enclosure", None, 209 {"url": self.url, 210 "length": str(self.length), 211 "type": self.type, 212 })
213
214 -class Source:
215 """Publish the item's original source, used by aggregators"""
216 - def __init__(self, name, url):
217 self.name = name 218 self.url = url
219 - def publish(self, handler):
220 _element(handler, "source", self.name, {"url": self.url})
221
222 -class SkipHours:
223 """Publish the skipHours 224 225 This takes a list of hours, as integers. 226 """ 227 element_attrs = {}
228 - def __init__(self, hours):
229 self.hours = hours
230 - def publish(self, handler):
231 if self.hours: 232 handler.startElement("skipHours", self.element_attrs) 233 for hour in self.hours: 234 _element(handler, "hour", str(hour)) 235 handler.endElement("skipHours")
236
237 -class SkipDays:
238 """Publish the skipDays 239 240 This takes a list of days as strings. 241 """ 242 element_attrs = {}
243 - def __init__(self, days):
244 self.days = days
245 - def publish(self, handler):
246 if self.days: 247 handler.startElement("skipDays", self.element_attrs) 248 for day in self.days: 249 _element(handler, "day", day) 250 handler.endElement("skipDays")
251
252 -class RSS2(WriteXmlMixin):
253 """The main RSS class. 254 255 Stores the channel attributes, with the "category" elements under 256 ".categories" and the RSS items under ".items". 257 """ 258 259 rss_attrs = {"version": "2.0"} 260 element_attrs = {}
261 - def __init__(self, 262 title, 263 link, 264 description, 265 266 language = None, 267 copyright = None, 268 managingEditor = None, 269 webMaster = None, 270 pubDate = None, # a datetime, *in* *GMT* 271 lastBuildDate = None, # a datetime 272 273 categories = None, # list of strings or Category 274 generator = _generator_name, 275 docs = "http://blogs.law.harvard.edu/tech/rss", 276 cloud = None, # a Cloud 277 ttl = None, # integer number of minutes 278 279 image = None, # an Image 280 rating = None, # a string; I don't know how it's used 281 textInput = None, # a TextInput 282 skipHours = None, # a SkipHours with a list of integers 283 skipDays = None, # a SkipDays with a list of strings 284 285 items = None, # list of RSSItems 286 ):
287 self.title = title 288 self.link = link 289 self.description = description 290 self.language = language 291 self.copyright = copyright 292 self.managingEditor = managingEditor 293 294 self.webMaster = webMaster 295 self.pubDate = pubDate 296 self.lastBuildDate = lastBuildDate 297 298 if categories is None: 299 categories = [] 300 self.categories = categories 301 self.generator = generator 302 self.docs = docs 303 self.cloud = cloud 304 self.ttl = ttl 305 self.image = image 306 self.rating = rating 307 self.textInput = textInput 308 self.skipHours = skipHours 309 self.skipDays = skipDays 310 311 if items is None: 312 items = [] 313 self.items = items
314
315 - def publish(self, handler):
316 handler.startElement("rss", self.rss_attrs) 317 handler.startElement("channel", self.element_attrs) 318 _element(handler, "title", self.title) 319 _element(handler, "link", self.link) 320 _element(handler, "description", self.description) 321 322 self.publish_extensions(handler) 323 324 _opt_element(handler, "language", self.language) 325 _opt_element(handler, "copyright", self.copyright) 326 _opt_element(handler, "managingEditor", self.managingEditor) 327 _opt_element(handler, "webMaster", self.webMaster) 328 329 pubDate = self.pubDate 330 if isinstance(pubDate, datetime.datetime): 331 pubDate = DateElement("pubDate", pubDate) 332 _opt_element(handler, "pubDate", pubDate) 333 334 lastBuildDate = self.lastBuildDate 335 if isinstance(lastBuildDate, datetime.datetime): 336 lastBuildDate = DateElement("lastBuildDate", lastBuildDate) 337 _opt_element(handler, "lastBuildDate", lastBuildDate) 338 339 for category in self.categories: 340 if isinstance(category, basestring): 341 category = Category(category) 342 category.publish(handler) 343 344 _opt_element(handler, "generator", self.generator) 345 _opt_element(handler, "docs", self.docs) 346 347 if self.cloud is not None: 348 self.cloud.publish(handler) 349 350 ttl = self.ttl 351 if isinstance(self.ttl, int): 352 ttl = IntElement("ttl", ttl) 353 _opt_element(handler, "tt", ttl) 354 355 if self.image is not None: 356 self.image.publish(handler) 357 358 _opt_element(handler, "rating", self.rating) 359 if self.textInput is not None: 360 self.textInput.publish(handler) 361 if self.skipHours is not None: 362 self.skipHours.publish(handler) 363 if self.skipDays is not None: 364 self.skipDays.publish(handler) 365 366 for item in self.items: 367 item.publish(handler) 368 369 handler.endElement("channel") 370 handler.endElement("rss")
371
372 - def publish_extensions(self, handler):
373 # Derived classes can hook into this to insert 374 # output after the three required fields. 375 pass
376 377 378
379 -class RSSItem(WriteXmlMixin):
380 """Publish an RSS Item""" 381 element_attrs = {}
382 - def __init__(self, 383 title = None, # string 384 link = None, # url as string 385 description = None, # string 386 author = None, # email address as string 387 categories = None, # list of string or Category 388 comments = None, # url as string 389 enclosure = None, # an Enclosure 390 guid = None, # a unique string 391 pubDate = None, # a datetime 392 source = None, # a Source 393 ):
394 395 if title is None and description is None: 396 raise TypeError( 397 "must define at least one of 'title' or 'description'") 398 self.title = title 399 self.link = link 400 self.description = description 401 self.author = author 402 if categories is None: 403 categories = [] 404 self.categories = categories 405 self.comments = comments 406 self.enclosure = enclosure 407 self.guid = guid 408 self.pubDate = pubDate 409 self.source = source
410 # It sure does get tedious typing these names three times... 411
412 - def publish(self, handler):
413 handler.startElement("item", self.element_attrs) 414 _opt_element(handler, "title", self.title) 415 _opt_element(handler, "link", self.link) 416 self.publish_extensions(handler) 417 _opt_element(handler, "description", self.description) 418 _opt_element(handler, "author", self.author) 419 420 for category in self.categories: 421 if isinstance(category, basestring): 422 category = Category(category) 423 category.publish(handler) 424 425 _opt_element(handler, "comments", self.comments) 426 if self.enclosure is not None: 427 self.enclosure.publish(handler) 428 _opt_element(handler, "guid", self.guid) 429 430 pubDate = self.pubDate 431 if isinstance(pubDate, datetime.datetime): 432 pubDate = DateElement("pubDate", pubDate) 433 _opt_element(handler, "pubDate", pubDate) 434 435 if self.source is not None: 436 self.source.publish(handler) 437 438 handler.endElement("item")
439
440 - def publish_extensions(self, handler):
441 # Derived classes can hook into this to insert 442 # output after the title and link elements 443 pass
444