import feedparser as fd # pipeline = """ # feed http://www.theregister.com/headlines.rss # feed http://www.mbl.is/mm/rss/frettir.xml # feed http://dr1ver.net/?feed=rss2 # feed http://hardocp.com/RSS/all_hardocp.xml # feed http://rss.slashdot.org/Slashdot/slashdot # filter summary:iphone # filter summary:apple # order updated:desc # """ #filter: summary:apple # limit def pipelines(self, pipeline): # ['feeddata', {'filter': ['summary', 'apple']}, 'feeddata', {'order': ['timestamp'] } ] feeds = [] output = [] tmp = pipeline.split('\n') for x in tmp: parts = x.split(' ') if len(parts) == 1 and parts[0] == '': continue if parts[0].lower() == "feed": # caching! cache = self._check_cache(parts[1]) if cache: feeds.append(cache) print "... got %s from cache" % parts[1] else: f = fd.parse(parts[1]) self._cache(parts[1], f) feeds.append(f) if parts[0].lower() == "filter" or parts[0].lower() == "order": feeds.append({parts[0]: parts[1].split(':') }) if parts[0].lower() == "limit": feeds.append({parts[0]: parts[1]}) for feed in feeds: if type(feed) == fd.FeedParserDict: for entry in feed.entries: output.append(entry) elif type(feed) == dict: action = feed.keys()[0] if action == 'filter': output = do_filter(output, feed.values()[0]) elif action == 'order': output = do_order(output, feed.values()[0]) elif action == 'limit': output = output[:int(feed.values()[0])] else: print "nothing to do!", feed return output def do_filter(input, raw_keys): tag = raw_keys.pop(0) out = [] for o in input: summary = o.get(tag, '').lower() keys = raw_keys[0].split(',') for key in keys: key = key.strip().lower() if key in summary: out.append(o) break return out def do_order(input, raw_keys): raw_keys.pop(0) tmp = [] for i in input: tmp.append([i.get('updated_parsed'), i]) tmp.sort() tmp.reverse() if raw_keys and raw_keys[0] == 'asc': tmp.reverse() out = [ k[1] for k in tmp ] return out