Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

RSS.py@ 671

Last change on this file since 671 was 201, checked in by Nicholas Riley, 19 years ago
RetroStatus
File size: 22.9 KB

Rev	Line
[201]	1	#!/usr/bin/env python
	2
	3	"""
	4	RSS.py
	5
	6	Classes for working with RSS channels as arbitrary data structures.
	7	Requires Python 2.2 or newer and PyXML 0.7.1 or newer.
	8
	9	ChannelBase - Base class for RSS Channels.
	10	CollectionChannel - RSS Channel modeled as a URI-per-entry
	11	dictionary.
	12	TrackingChannel - RSS Channel modeled as an item-per-entry
	13	dictionary.
	14	RSSParser - Multi-format RSS/XML Parser.
	15
	16	Typically, the *Channel clases will be most useful to developers.
	17
	18	This library provides tools for working with RSS feeds as data
	19	structures. The core is an RSS parser capable of understanding most
	20	RSS formats, and a serializer that produces RSS1.0. The RSS channel
	21	itself can be represented as any arbitrary data structure; two such
	22	structures are provided both as examples and to service common
	23	usage. This approach allows channels to be manipulated and stored in
	24	a fashion that suits both their semantics and the applications that
	25	access them.
	26
	27	Both the parser and the serializer have the following limitations:
	28	- RSS 1.0 "rich content" modules are not supported
	29	- RSS 0.9x features that rely on attributes are not supported
	30	- RDF is not understood; this library does not expose statements or
	31	understand RDF syntax beyond that documented in RSS1.0 (taking
	32	into account the previously listed limitations)
	33
	34	The RSS format is made up of three metadata sections (channel,
	35	image, and textinput) and a list of items. Each individual metadata
	36	section and each item is passed around as an "item dictionary",
	37	which is a Python dictionary with (namespace, localname) tuples as
	38	keys. The values of the dictionaries are always strings; they may
	39	contain markup, which will be rendered into the RSS/XML when
	40	serialized.
	41
	42	Individual items are found by using an "item identifier"; this is a
	43	channel-unique, string identifier for any given item. Item
	44	identifiers may be generated in a variety of ways, depending on the
	45	requirements of the channel.
	46
	47	Certain types of channel metadata are automatically generated, and
	48	will not be returned or honored when accessed. They includes the
	49	"items", "image" and "textinput" children of the channel element.
	50
	51
	52	TODO:
	53	- any markup (and the content inside) in item or metadata children
	54	(e.g., HTML in a <description> will be silently ignored.
	55	- test suite
	56	- a function (XPath-based?) to detect a channel's type and return
	57	the appropriate class.
	58	- pay attention to <rss:items> when appropriate.
	59	"""
	60
	61	__license__ = """
	62	Copyright (c) 2004 Mark Nottingham <mnot@pobox.com>
	63
	64	Permission is hereby granted, free of charge, to any person obtaining a copy
	65	of this software and associated documentation files (the "Software"), to deal
	66	in the Software without restriction, including without limitation the rights
	67	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	68	copies of the Software, and to permit persons to whom the Software is
	69	furnished to do so, subject to the following conditions:
	70
	71	The above copyright notice and this permission notice shall be included in all
	72	copies or substantial portions of the Software.
	73
	74	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	75	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	76	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	77	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	78	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	79	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	80	SOFTWARE.
	81	"""
	82
	83	__version__ = "0.46"
	84
	85	import UserDict, sys, codecs, sha, types, signal
	86	import xml.sax as sax
	87	import xml.sax.saxutils as saxutils
	88	import cPickle as pickle
	89	import cStringIO as StringIO
	90
	91	versionURI = 'http://www.mnot.net/python/RSS.py?version=%s' % __version__
	92
	93
	94	class _NamespaceMap:
	95	"""
	96	Prefix <-> Namespace map.
	97
	98	Hold prefix->namespace mappings, and generate new prefixes when
	99	necessary. Exposes prefix->URI map as attributes, URI->prefix
	100	through getPrefix(URI).
	101	"""
	102
	103	def __init__(self):
	104	self._nsID = 0 # seed for namespace prefix generation
	105	self._prefixMap = {}
	106	self.rdf = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
	107	self.rss10 = 'http://purl.org/rss/1.0/'
	108	self.rss09 = 'http://my.netscape.com/rdf/simple/0.9/'
	109	self.rss091 = 'http://purl.org/rss/1.0/modules/rss091/'
	110	self.dc = 'http://purl.org/dc/elements/1.1/'
	111	self.syn = 'http://purl.org/rss/modules/syndication/'
	112	self.content = 'http://purl.org/rss/1.0/modules/content/'
	113	self.admin = 'http://webns.net/mvcb/'
	114	self.ag = 'http://purl.org/rss/modules/aggregation/'
	115	self.annotate = 'http://purl.org/rss/1.0/modules/annotate/'
	116	self.cp = 'http://my.theinfo.org/changed/1.0/rss/'
	117	self.company = 'http://purl.org/rss/1.0/modules/company'
	118	self.event = 'http://purl.org/rss/1.0/modules/event/'
	119	self.slash = 'http://purl.org/rss/1.0/modules/slash/'
	120	self.html = 'http://www.w3.org/html4/'
	121
	122	def __setattr__(self, attr, value):
	123	self.__dict__[attr] = value
	124	if attr[0] != '_':
	125	self._prefixMap[value] = attr
	126
	127	def getPrefix(self, URI):
	128	"""
	129	Get the prefix for a given URI; generate one if it
	130	doesn't exist.
	131	"""
	132	try:
	133	if URI == self.rss10:
	134	return None # special case
	135	return self._prefixMap[URI]
	136	except KeyError:
	137	o = []
	138	d = self._nsID
	139	while 1:
	140	o.insert(0, d % 26)
	141	d = d / 26
	142	if not d: break
	143	candidate = "".join(map(lambda a: chr(a+97), o))
	144	self._nsID = self._nsID + 1
	145	if candidate in self._prefixMap.values():
	146	candidate = self.getPrefix(URI)
	147	setattr(self, candidate, URI)
	148	return candidate
	149
	150
	151	ns = _NamespaceMap()
	152
	153	# possible namespaces for RSS docs (None included for 0.9x)
	154	rssNamespaces = [ns.rss09, ns.rss10, None]
	155
	156	# major sections of a RSS file
	157	rssSections = [ (ns.rss10, 'channel'),
	158	(ns.rss10, 'image'),
	159	(ns.rss10, 'textarea')
	160	]
	161
	162	# RSS core element localnames
	163	rssElements = ['rss', 'channel', 'image', 'textarea', 'item', 'items',
	164	'title', 'link', 'description', 'url']
	165
	166	# RSS elements whose data is in an rdf:resource attribute
	167	rdfResources = [ (ns.rss10, 'image'),
	168	(ns.rss10, 'textarea'),
	169	(ns.admin, 'errorReportsTo'),
	170	(ns.admin, 'generatorAgent'),
	171	(ns.annotate, 'reference'),
	172	(ns.cp, 'server')
	173	]
	174
	175
	176	class ChannelBase:
	177	"""
	178	Base class for RSS Channels.
	179
	180	A number of generic methods for accessing and setting channel
	181	data and metadata are exposed, for the benefit of subclasses.
	182	They may be used by applications as well, or the data structure
	183	of the subclass may be directly manipulated.
	184	"""
	185
	186	def __init__(self):
	187	self.encoding = 'utf-8'
	188
	189	def listItems(self):
	190	"""List the items in a channel, with a list of identifiers."""
	191	pass # override me
	192
	193	def addItem(self, item, index=0):
	194	"""Add an item to the channel. Expects an item dictionary."""
	195	pass # override me
	196
	197	def getItem(self, identifier):
	198	"""Get the appropriate item dictionary for a given identifier."""
	199	pass # override me
	200
	201	def getMD(self, name):
	202	"""
	203	Get the [name] metadata as an item dictionary, where type is
	204	a tuple (typically, in the ns:rss10 namespace, with a localname of
	205	channel\|image\|textinput). MUST return an empty dictionary if the
	206	metadata isn't found.
	207	"""
	208	pass # override me
	209
	210	def setMD(self, name, metadata):
	211	"""
	212	Set the [name] metadata, where name is a tuple (typically,
	213	it will be in the ns:rss10 namespace, and have a localname of
	214	channel\|image\|textinput), and metadata is an item dictionary.
	215	"""
	216	pass # override me
	217
	218	def parse(self, url, timeout=30):
	219	"""
	220	Fetch a channel representation from a URL and populate
	221	the channel.
	222	"""
	223	dh = RSSParser(self)
	224	p = sax.sax2exts.make_parser()
	225	p.setContentHandler(dh)
	226	p.setFeature(sax.handler.feature_namespaces, 1)
	227	signal.signal(signal.SIGALRM, self._timeout)
	228	signal.alarm(timeout)
	229	try:
	230	p.parse(str(url)) # URIs are ascii
	231	finally:
	232	signal.alarm(0)
	233	return dh
	234
	235	def _timeout(self, **args):
	236	raise IOError, 'timeout'
	237
	238	def parseFile(self, file):
	239	"""Parse a file and populate the channel."""
	240	dh = RSSParser(self)
	241	p = sax.sax2exts.make_parser()
	242	p.setContentHandler(dh)
	243	p.setFeature(sax.handler.feature_namespaces, 1)
	244	p.parseFile(file)
	245	return dh
	246
	247	def __str__(self):
	248	return self.output(self.listItems())
	249
	250	def output(self, items):
	251	"""Return the items referred to by a list of identifiers."""
	252	assert type(items) is types.ListType, "items must be a list (%s)" % \
	253	type(items)
	254	out = StringIO.StringIO()
	255	o = _XMLGenerator(out, self.encoding, 'replace')
	256	channelMD = self.getMD((ns.rss10, "channel"))
	257	imageMD = self.getMD((ns.rss10, "image"))
	258	textinputMD = self.getMD((ns.rss10, "textinput"))
	259	channelMD[(ns.admin, 'generatorAgent')] = versionURI
	260
	261	# gather namespaces, map prefixes
	262	namespaces = {ns.rdf: 1}
	263	namespaces.update(dict(
	264	channelMD.keys() + imageMD.keys() + textinputMD.keys()))
	265	[namespaces.update(dict(i.keys())) for i in map(self.getItem, items)]
	266	for namespace in namespaces.keys():
	267	o.startPrefixMapping(ns.getPrefix(namespace), namespace)
	268
	269	# write the XML
	270	o.startDocument()
	271	o.startElementNS((ns.rdf, 'RDF'), None, {})
	272	o.ignorableWhitespace('\n')
	273	o.startElementNS(
	274	(ns.rss10, 'channel'), None,
	275	{(ns.rdf, 'about'): channelMD[(ns.rss10, 'link')]})
	276	o.ignorableWhitespace('\n')
	277
	278	# /channel
	279	for name, data in channelMD.items():
	280	if name in [(ns.rss10, 'items'), (ns.rss10, 'image'),
	281	(ns.rss10, 'textinput')]:
	282	continue
	283	o.ignorableWhitespace(' ')
	284	if name in rdfResources:
	285	o.startElementNS(name, None, {(ns.rdf, 'resource'): data})
	286	else:
	287	if "<" in data:
	288	o.startElementNS(name, None,
	289	{(ns.rdf, "parseType"): "Literal"})
	290	else:
	291	o.startElementNS(name, None, {})
	292	o.characters(data)
	293	o.endElementNS(name, None)
	294	o.ignorableWhitespace('\n')
	295
	296	# /channel/items
	297	o.ignorableWhitespace(' ')
	298	o.startElementNS((ns.rss10, 'items'), None, {})
	299	o.startElementNS((ns.rdf, 'Seq'), None, {})
	300	o.ignorableWhitespace('\n')
	301	for id in items:
	302	o.ignorableWhitespace(' ')
	303	o.startElementNS((ns.rdf, 'li'), None,
	304	{(ns.rdf, 'resource'): self.getItem(id).get((ns.rss10, 'link'),
	305	_make_hash(self.getItem(id)))})
	306	o.endElementNS((ns.rdf, 'li'), None)
	307	o.ignorableWhitespace('\n')
	308	o.ignorableWhitespace(' ')
	309	o.endElementNS((ns.rdf, 'Seq'), None)
	310	o.endElementNS((ns.rss10, 'items'), None)
	311	o.ignorableWhitespace('\n')
	312
	313	# /channel/image
	314	if imageMD.has_key((ns.rss10, 'url')):
	315	o.startElementNS((ns.rss10, 'image'), None,
	316	{(ns.rdf, 'about'): imageMD[(ns.rss10, 'url')]})
	317	o.endElementNS((ns.rss10, 'image'), None)
	318	o.ignorableWhitespace('\n')
	319
	320	# /channel/textinput
	321	if textinputMD.has_key((ns.rss10, 'link')):
	322	o.startElementNS((ns.rss10, 'textinput'), None,
	323	{(ns.rdf, 'about'): textinputMD[(ns.rss10, 'link')]})
	324	o.endElementNS((ns.rss10, 'textinput'), None)
	325	o.ignorableWhitespace('\n')
	326	o.endElementNS((ns.rss10, 'channel'), None)
	327	o.ignorableWhitespace('\n')
	328
	329	# /image
	330	if imageMD.has_key((ns.rss10, 'url')):
	331	o.startElementNS((ns.rss10, 'image'), None,
	332	{(ns.rdf, 'about'): imageMD[(ns.rss10, 'url')]})
	333	for name, data in imageMD.items():
	334	o.ignorableWhitespace(' ')
	335	if name in rdfResources:
	336	o.startElementNS(name, None, {(ns.rdf, 'resource'): data})
	337	else:
	338	if "<" in data:
	339	o.startElementNS(name, None,
	340	{(ns.rdf, "parseType"): "Literal"})
	341	else:
	342	o.startElementNS(name, None, {})
	343	o.characters(data)
	344	o.endElementNS(name, None)
	345	o.ignorableWhitespace('\n')
	346	o.endElementNS((ns.rss10, 'image'), None)
	347	o.ignorableWhitespace('\n')
	348
	349	# /textinput
	350	if textinputMD.has_key((ns.rss10, 'link')):
	351	o.startElementNS((ns.rss10, 'textinput'), None,
	352	{(ns.rdf, 'about'): textinputMD[(ns.rss10, 'link')]})
	353	for name, data in textinputMD.items():
	354	o.ignorableWhitespace(' ')
	355	if name in rdfResources:
	356	o.startElementNS(name, None, {(ns.rdf, 'resource'): data})
	357	else:
	358	if "<" in data:
	359	o.startElementNS(name, None,
	360	{(ns.rdf, "parseType"): "Literal"})
	361	else:
	362	o.startElementNS(name, None, {})
	363	o.characters(data)
	364	o.endElementNS(name, None)
	365	o.ignorableWhitespace('\n')
	366	o.endElementNS((ns.rss10, 'textinput'), None)
	367	o.ignorableWhitespace('\n')
	368
	369	# /item
	370	for id in items:
	371	item = self.getItem(id)
	372	o.startElementNS(
	373	(ns.rss10, 'item'), None, {(ns.rdf, 'about'):
	374	item.get((ns.rss10, 'link'), _make_hash(item))})
	375	o.ignorableWhitespace('\n')
	376	for name, data in item.items():
	377	o.ignorableWhitespace(' ')
	378	if name in rdfResources:
	379	o.startElementNS(name, None, {(ns.rdf, 'resource'): data})
	380	else:
	381	if "<" in data:
	382	o.startElementNS(name, None,
	383	{(ns.rdf, "parseType"): "Literal"})
	384	else:
	385	o.startElementNS(name, None, {})
	386	o.characters(data)
	387	o.endElementNS(name, None)
	388	o.ignorableWhitespace('\n')
	389	o.endElementNS((ns.rss10, 'item'), None)
	390	o.ignorableWhitespace('\n')
	391	o.endElementNS((ns.rdf, 'RDF'), None)
	392	o.endDocument()
	393	out.seek(0)
	394	return out.read()
	395
	396
	397
	398	class TrackingChannel(ChannelBase, UserDict.UserDict):
	399	"""
	400	RSS Channel modeled as a URI-per-entry dictionary.
	401
	402	Item identifiers are (uri, index) tuples, where uri is
	403	the rdf:about or rss:link URI, and index indicates the
	404	position in a list of a number of times that URI has
	405	appeared in the channel.
	406
	407	This allows "tracking" channels that track the state of
	408	a group of resources, such as stock tickers, file state
	409	changes, etc.
	410
	411	For example:
	412
	413	{
	414	(ns.rss10, "channel"): {
	415	(ns.rss10, "title"): "the channel",
	416	(ns.rss10, "description"): "whatever",
	417	},
	418	(ns.rss10, "items"):
	419	["http://example.com/foo", "htp://example.com/bar", ... ],
	420	"http://example.com/foo" [
	421	{
	422	(ns.rss10, "title"): "item 1",
	423	(ns.rss10, "link"): "http://example.com/",
	424	(ns.rss10, "description"): "foo",
	425	},
	426	{
	427	(ns.rss10, "title"): "item 1 revised",
	428	(ns.rss10, "link"): "http://example.com/",
	429	(ns.rss10, "description"): "foo revisited",
	430	},
	431	]
	432
	433	"http://example.com/bar" [
	434	...
	435	]
	436	}
	437
	438	"""
	439
	440	def __init__(self, data={}):
	441	ChannelBase.__init__(self)
	442	UserDict.UserDict.__init__(self, data)
	443	self.data[(ns.rss10, 'items')] = []
	444
	445	def listItems(self):
	446	return self[(ns.rss10, 'items')]
	447
	448	def addItem(self, item, index=0):
	449	if index == -1: index = len(self.data[(ns.rss10, 'items')])
	450	uri = item.get((ns.rss10, "link"), _make_hash(item)) # shoudn't happen
	451	if not self.data.has_key(uri):
	452	self.data[uri] = [item]
	453	else:
	454	self.data[uri].append(item)
	455	self.data[(ns.rss10, 'items')].insert(index, (uri, len(self.data[uri])))
	456
	457	def truncateToLength(self, length):
	458	items = self.listItems()
	459	data = self.data
	460	overage = len(items) - length
	461	while overage > 0:
	462	del data[items.pop()[0]]
	463	overage -= 1
	464
	465	def getItem(self, identifier):
	466	(uri, index) = identifier
	467	try:
	468	return self.data[uri][index-1]
	469	except (KeyError, IndexError):
	470	return {}
	471
	472	def getMD(self, name):
	473	return self.data.get(name, {})
	474
	475	def setMD(self, name, metadata):
	476	self.data[name] = metadata
	477
	478
	479
	480	class CollectionChannel(ChannelBase, UserDict.UserDict):
	481	"""
	482	RSS Channel modeled as an item-per-entry dictionary.
	483
	484	Each Item is hashed to create a unique entry in the
	485	dictionary, no matter how many times a particular
	486	URI is in the channel.
	487
	488	This allows "collection" channels, which are typically
	489	used for news updates, etc.
	490
	491	For example:
	492
	493	{
	494	(ns.rss10, "channel"): {
	495	(ns.rss10, "title"): "the channel",
	496	(ns.rss10, "description"): "whatever",
	497	},
	498	(ns.rss10, "items"): ["ID1", "ID2", ... ],
	499	"ID1" {
	500	(ns.rss10, "title"): "item 1",
	501	(ns.rss10, "link"): "http://example.com/",
	502	(ns.rss10, "description"): "foo",
	503	},
	504	"ID2" {
	505	...
	506	}
	507	}
	508
	509	Note that:
	510	- items are keyed by a hash-data URI; metadata is keyed
	511	by a (namespace, localname) tuple.
	512	- (ns.rss10, items) is a property; it cannot be
	513	manipulated without manipulating the corresponding
	514	(sub-)items (delete, add)
	515	- likewise, all item's are properties; adding, deleting,
	516	appending an item modifies (ns.rss10, items)
	517	correspondingly
	518	"""
	519
	520	def __init__(self, data={}):
	521	ChannelBase.__init__(self)
	522	UserDict.UserDict.__init__(self, data)
	523	self.data[(ns.rss10, 'items')] = []
	524
	525	def listItems(self):
	526	return self.data[(ns.rss10, 'items')]
	527
	528	def addItem(self, item, index=0):
	529	"""append an item dictionary to the channel"""
	530	if index == -1: index = len(self.data[(ns.rss10, 'items')])
	531	ID = _make_hash(item)
	532	self.data[ID] = item
	533	self.data[(ns.rss10, 'items')].insert(index, ID)
	534
	535	def getItem(self, identifier):
	536	return self.data.get(identifier, {})
	537
	538	def getMD(self, name):
	539	return self.data.get(name, {})
	540
	541	def setMD(self, name, metadata):
	542	self.data[name] = metadata
	543
	544
	545	class _XMLGenerator(saxutils.XMLGenerator):
	546	"""
	547	Modified XMLGenerator.
	548
	549	Allows modification of encoding error handling, and tries to
	550	encode problematic characters as Latin-1 to work around some
	551	implementations.
	552	"""
	553
	554	def __init__(self, out=None, encoding='iso-8859-1', errors='strict'):
	555	saxutils.XMLGenerator.__init__(self, out=out, encoding=encoding)
	556	if out is None:
	557	out = sys.stdout
	558	self._out = codecs.lookup(encoding)[3](out, errors)
	559
	560	def characters(self, content):
	561	try:
	562	self._out.write(sax.saxutils.escape(content))
	563	except UnicodeError: # hack for broken content
	564	self._out.write(sax.saxutils.escape(unicode(content, 'Latin-1')))
	565
	566
	567	class RSSParser(sax.handler.ContentHandler):
	568	"""
	569	Multi-format RSS/XML Parser.
	570
	571	Parse XML into RSS Channel objects. May optionally be passed a
	572	Channel() instance to append to.
	573
	574	Formats understood include:
	575	- RSS 0.9
	576	- RSS 0.91
	577	- RSS 0.92
	578	- RSS 1.0 (EXCEPT "rich content" modules)
	579
	580	"Core" RSS elements are normalized to the RSS1.0 namespace.
	581	"""
	582
	583	def __init__(self, channel, encoding='utf-8'):
	584	sax.handler.ContentHandler.__init__(self)
	585	self.channel = channel
	586	self.encoding = encoding
	587	self._context = []
	588	self._tmp_item = {}
	589	self._tmp_md = { (ns.rss10, "channel"): {},
	590	(ns.rss10, "image"): {},
	591	(ns.rss10, "textinput"): {},
	592	}
	593	self._tmp_buf = ''
	594	self.version = None
	595
	596	def startElementNS(self, name, qname, attrs):
	597	if name[1] is 'rss': # sniff version
	598	if name[0] is None:
	599	self.version = attrs.get('version', None)
	600	else:
	601	self.version = name[0]
	602	# normalize the rss namespace
	603	if name[0] in rssNamespaces and name[1] in rssElements:
	604	name = (ns.rss10, name[1])
	605	elif name[0] is None:
	606	name = (ns.rss091, name[1])
	607	self._context.append(name)
	608	if name == (ns.rss10, 'item'):
	609	self._tmp_item = {}
	610	self._tmp_buf = ''
	611	elif len(self._context) > 1 and \
	612	self._context[-2] == (ns.rss10, 'item') and \
	613	name in rdfResources:
	614	self._tmp_item[name] = attrs[(ns.rdf, 'resource')]
	615
	616
	617	def endElementNS(self, name, qname):
	618	# normalize the rss namespace
	619	if name[0] in rssNamespaces and name[1] in rssElements:
	620	name = (ns.rss10, name[1])
	621	elif name[0] is None:
	622	name = (ns.rss091, name[1])
	623	if (ns.rss10, 'item') in self._context:
	624	if self._context[-1] == (ns.rss10, 'item'): # end of an item
	625	self.channel.addItem(self._tmp_item, len(self.channel))
	626	self._tmp_item = {}
	627	elif self._context[-2] == (ns.rss10, 'item'): # an item's child
	628	if name not in rdfResources:
	629	self._tmp_item[name] = self._tmp_buf.strip()
	630	else: # an item's grandchild
	631	pass ###
	632	elif len(self._context) > 2 and self._context[-2] in rssSections:
	633	# metadata
	634	self._tmp_md[self._context[-2]][name] = self._tmp_buf.strip()
	635	self._tmp_buf = ''
	636	self._context.pop()
	637
	638	def endDocument(self):
	639	for name, metadata in self._tmp_md.items():
	640	self.channel.setMD(name, metadata)
	641
	642	def characters(self, content):
	643	self._tmp_buf = self._tmp_buf + content.encode(self.encoding)
	644
	645
	646	def _make_hash(data):
	647	return "hash-data:SHA:" + sha.new(pickle.dumps(data)).hexdigest()[:20]
	648
	649
	650	if __name__ == "__main__":
	651	# a simple test
	652	c = TrackingChannel()
	653	c.parse(sys.argv[1])
	654	print c

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/RetroStatus/RSS.py@ 671

Download in other formats: