--- /dev/null
+"""PyRSS2Gen - A Python library for generating RSS 2.0 feeds."""
+
+__name__ = "PyRSS2Gen"
+__version__ = (1, 0, 0)
+__author__ = "Andrew Dalke <dalke@dalkescientific.com>"
+
+_generator_name = __name__ + "-" + ".".join(map(str, __version__))
+
+import datetime
+
+# Could make this the base class; will need to add 'publish'
+class WriteXmlMixin:
+ def write_xml(self, outfile, encoding = "iso-8859-1"):
+ from xml.sax import saxutils
+ handler = saxutils.XMLGenerator(outfile, encoding)
+ handler.startDocument()
+ self.publish(handler)
+ handler.endDocument()
+
+ def to_xml(self, encoding = "iso-8859-1"):
+ try:
+ import cStringIO as StringIO
+ except ImportError:
+ import StringIO
+ f = StringIO.StringIO()
+ self.write_xml(f, encoding)
+ return f.getvalue()
+
+
+def _element(handler, name, obj, d = {}):
+ if isinstance(obj, basestring) or obj is None:
+ # special-case handling to make the API easier
+ # to use for the common case.
+ handler.startElement(name, d)
+ if obj is not None:
+ handler.characters(obj)
+ handler.endElement(name)
+ else:
+ # It better know how to emit the correct XML.
+ obj.publish(handler)
+
+def _opt_element(handler, name, obj):
+ if obj is None:
+ return
+ _element(handler, name, obj)
+
+
+def _format_date(dt):
+ """convert a datetime into an RFC 822 formatted date
+
+ Input date must be in GMT.
+ """
+ # Looks like:
+ # Sat, 07 Sep 2002 00:00:01 GMT
+ # Can't use strftime because that's locale dependent
+ #
+ # Isn't there a standard way to do this for Python? The
+ # rfc822 and email.Utils modules assume a timestamp. The
+ # following is based on the rfc822 module.
+ return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
+ ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"][dt.weekday()],
+ dt.day,
+ ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][dt.month-1],
+ dt.year, dt.hour, dt.minute, dt.second)
+
+
+##
+# A couple simple wrapper objects for the fields which
+# take a simple value other than a string.
+class IntElement:
+ """implements the 'publish' API for integers
+
+ Takes the tag name and the integer value to publish.
+
+ (Could be used for anything which uses str() to be published
+ to text for XML.)
+ """
+ element_attrs = {}
+ def __init__(self, name, val):
+ self.name = name
+ self.val = val
+ def publish(self, handler):
+ handler.startElement(self.name, self.element_attrs)
+ handler.characters(str(self.val))
+ handler.endElement(self.name)
+
+class DateElement:
+ """implements the 'publish' API for a datetime.datetime
+
+ Takes the tag name and the datetime to publish.
+
+ Converts the datetime to RFC 2822 timestamp (4-digit year).
+ """
+ def __init__(self, name, dt):
+ self.name = name
+ self.dt = dt
+ def publish(self, handler):
+ _element(handler, self.name, _format_date(self.dt))
+####
+
+class Category:
+ """Publish a category element"""
+ def __init__(self, category, domain = None):
+ self.category = category
+ self.domain = domain
+ def publish(self, handler):
+ d = {}
+ if self.domain is not None:
+ d["domain"] = self.domain
+ _element(handler, "category", self.category, d)
+
+class Cloud:
+ """Publish a cloud"""
+ def __init__(self, domain, port, path,
+ registerProcedure, protocol):
+ self.domain = domain
+ self.port = port
+ self.path = path
+ self.registerProcedure = registerProcedure
+ self.protocol = protocol
+ def publish(self, handler):
+ _element(handler, "cloud", None, {
+ "domain": self.domain,
+ "port": str(self.port),
+ "path": self.path,
+ "registerProcedure": self.registerProcedure,
+ "protocol": self.protocol})
+
+class Image:
+ """Publish a channel Image"""
+ element_attrs = {}
+ def __init__(self, url, title, link,
+ width = None, height = None, description = None):
+ self.url = url
+ self.title = title
+ self.link = link
+ self.width = width
+ self.height = height
+ self.description = description
+
+ def publish(self, handler):
+ handler.startElement("image", self.element_attrs)
+
+ _element(handler, "url", self.url)
+ _element(handler, "title", self.title)
+ _element(handler, "link", self.link)
+
+ width = self.width
+ if isinstance(width, int):
+ width = IntElement("width", width)
+ _opt_element(handler, "width", width)
+
+ height = self.height
+ if isinstance(height, int):
+ height = IntElement("height", height)
+ _opt_element(handler, "height", height)
+
+ _opt_element(handler, "description", self.description)
+
+ handler.endElement("image")
+
+class Guid:
+ """Publish a guid
+
+ Defaults to being a permalink, which is the assumption if it's
+ omitted. Hence strings are always permalinks.
+ """
+ def __init__(self, guid, isPermaLink = 1):
+ self.guid = guid
+ self.isPermaLink = isPermaLink
+ def publish(self, handler):
+ d = {}
+ if self.isPermaLink:
+ d["isPermaLink"] = "true"
+ else:
+ d["isPermaLink"] = "false"
+ _element(handler, "guid", self.guid, d)
+
+class TextInput:
+ """Publish a textInput
+
+ Apparently this is rarely used.
+ """
+ element_attrs = {}
+ def __init__(self, title, description, name, link):
+ self.title = title
+ self.description = description
+ self.name = name
+ self.link = link
+
+ def publish(self, handler):
+ handler.startElement("textInput", self.element_attrs)
+ _element(handler, "title", self.title)
+ _element(handler, "description", self.description)
+ _element(handler, "name", self.name)
+ _element(handler, "link", self.link)
+ handler.endElement("textInput")
+
+
+class Enclosure:
+ """Publish an enclosure"""
+ def __init__(self, url, length, type):
+ self.url = url
+ self.length = length
+ self.type = type
+ def publish(self, handler):
+ _element(handler, "enclosure", None,
+ {"url": self.url,
+ "length": str(self.length),
+ "type": self.type,
+ })
+
+class Source:
+ """Publish the item's original source, used by aggregators"""
+ def __init__(self, name, url):
+ self.name = name
+ self.url = url
+ def publish(self, handler):
+ _element(handler, "source", self.name, {"url": self.url})
+
+class SkipHours:
+ """Publish the skipHours
+
+ This takes a list of hours, as integers.
+ """
+ element_attrs = {}
+ def __init__(self, hours):
+ self.hours = hours
+ def publish(self, handler):
+ if self.hours:
+ handler.startElement("skipHours", self.element_attrs)
+ for hour in self.hours:
+ _element(handler, "hour", str(hour))
+ handler.endElement("skipHours")
+
+class SkipDays:
+ """Publish the skipDays
+
+ This takes a list of days as strings.
+ """
+ element_attrs = {}
+ def __init__(self, days):
+ self.days = days
+ def publish(self, handler):
+ if self.days:
+ handler.startElement("skipDays", self.element_attrs)
+ for day in self.days:
+ _element(handler, "day", day)
+ handler.endElement("skipDays")
+
+class RSS2(WriteXmlMixin):
+ """The main RSS class.
+
+ Stores the channel attributes, with the "category" elements under
+ ".categories" and the RSS items under ".items".
+ """
+
+ rss_attrs = {"version": "2.0"}
+ element_attrs = {}
+ def __init__(self,
+ title,
+ link,
+ description,
+
+ language = None,
+ copyright = None,
+ managingEditor = None,
+ webMaster = None,
+ pubDate = None, # a datetime, *in* *GMT*
+ lastBuildDate = None, # a datetime
+
+ categories = None, # list of strings or Category
+ generator = _generator_name,
+ docs = "http://blogs.law.harvard.edu/tech/rss",
+ cloud = None, # a Cloud
+ ttl = None, # integer number of minutes
+
+ image = None, # an Image
+ rating = None, # a string; I don't know how it's used
+ textInput = None, # a TextInput
+ skipHours = None, # a SkipHours with a list of integers
+ skipDays = None, # a SkipDays with a list of strings
+
+ items = None, # list of RSSItems
+ ):
+ self.title = title
+ self.link = link
+ self.description = description
+ self.language = language
+ self.copyright = copyright
+ self.managingEditor = managingEditor
+
+ self.webMaster = webMaster
+ self.pubDate = pubDate
+ self.lastBuildDate = lastBuildDate
+
+ if categories is None:
+ categories = []
+ self.categories = categories
+ self.generator = generator
+ self.docs = docs
+ self.cloud = cloud
+ self.ttl = ttl
+ self.image = image
+ self.rating = rating
+ self.textInput = textInput
+ self.skipHours = skipHours
+ self.skipDays = skipDays
+
+ if items is None:
+ items = []
+ self.items = items
+
+ def publish(self, handler):
+ handler.startElement("rss", self.rss_attrs)
+ handler.startElement("channel", self.element_attrs)
+ _element(handler, "title", self.title)
+ _element(handler, "link", self.link)
+ _element(handler, "description", self.description)
+
+ self.publish_extensions(handler)
+
+ _opt_element(handler, "language", self.language)
+ _opt_element(handler, "copyright", self.copyright)
+ _opt_element(handler, "managingEditor", self.managingEditor)
+ _opt_element(handler, "webMaster", self.webMaster)
+
+ pubDate = self.pubDate
+ if isinstance(pubDate, datetime.datetime):
+ pubDate = DateElement("pubDate", pubDate)
+ _opt_element(handler, "pubDate", pubDate)
+
+ lastBuildDate = self.lastBuildDate
+ if isinstance(lastBuildDate, datetime.datetime):
+ lastBuildDate = DateElement("lastBuildDate", lastBuildDate)
+ _opt_element(handler, "lastBuildDate", lastBuildDate)
+
+ for category in self.categories:
+ if isinstance(category, basestring):
+ category = Category(category)
+ category.publish(handler)
+
+ _opt_element(handler, "generator", self.generator)
+ _opt_element(handler, "docs", self.docs)
+
+ if self.cloud is not None:
+ self.cloud.publish(handler)
+
+ ttl = self.ttl
+ if isinstance(self.ttl, int):
+ ttl = IntElement("ttl", ttl)
+ _opt_element(handler, "tt", ttl)
+
+ if self.image is not None:
+ self.image.publish(handler)
+
+ _opt_element(handler, "rating", self.rating)
+ if self.textInput is not None:
+ self.textInput.publish(handler)
+ if self.skipHours is not None:
+ self.skipHours.publish(handler)
+ if self.skipDays is not None:
+ self.skipDays.publish(handler)
+
+ for item in self.items:
+ item.publish(handler)
+
+ handler.endElement("channel")
+ handler.endElement("rss")
+
+ def publish_extensions(self, handler):
+ # Derived classes can hook into this to insert
+ # output after the three required fields.
+ pass
+
+
+
+class RSSItem(WriteXmlMixin):
+ """Publish an RSS Item"""
+ element_attrs = {}
+ def __init__(self,
+ title = None, # string
+ link = None, # url as string
+ description = None, # string
+ author = None, # email address as string
+ categories = None, # list of string or Category
+ comments = None, # url as string
+ enclosure = None, # an Enclosure
+ guid = None, # a unique string
+ pubDate = None, # a datetime
+ source = None, # a Source
+ ):
+
+ if title is None and description is None:
+ raise TypeError(
+ "must define at least one of 'title' or 'description'")
+ self.title = title
+ self.link = link
+ self.description = description
+ self.author = author
+ if categories is None:
+ categories = []
+ self.categories = categories
+ self.comments = comments
+ self.enclosure = enclosure
+ self.guid = guid
+ self.pubDate = pubDate
+ self.source = source
+ # It sure does get tedious typing these names three times...
+
+ def publish(self, handler):
+ handler.startElement("item", self.element_attrs)
+ _opt_element(handler, "title", self.title)
+ _opt_element(handler, "link", self.link)
+ self.publish_extensions(handler)
+ _opt_element(handler, "description", self.description)
+ _opt_element(handler, "author", self.author)
+
+ for category in self.categories:
+ if isinstance(category, basestring):
+ category = Category(category)
+ category.publish(handler)
+
+ _opt_element(handler, "comments", self.comments)
+ if self.enclosure is not None:
+ self.enclosure.publish(handler)
+ _opt_element(handler, "guid", self.guid)
+
+ pubDate = self.pubDate
+ if isinstance(pubDate, datetime.datetime):
+ pubDate = DateElement("pubDate", pubDate)
+ _opt_element(handler, "pubDate", pubDate)
+
+ if self.source is not None:
+ self.source.publish(handler)
+
+ handler.endElement("item")
+
+ def publish_extensions(self, handler):
+ # Derived classes can hook into this to insert
+ # output after the title and link elements
+ pass