zs

Zeitungsschau rss to email converter
git clone git://r-36.net/zs
Log | Files | Refs | LICENSE

commit 18454a1fe6f19aa9cdd780128a713066ee23ba9d
parent 0626d417d52845d03223244faa8238210ff87229
Author: Christoph Lohmann <20h@r-36.net>
Date:   Wed, 19 Mar 2014 18:14:03 +0100

Add handling for xml syntax errors.

If XML fails, try HTML. If that fails, bail.

Diffstat:
feed.py | 15+++++++++++++--
zs.py | 3+++
2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/feed.py b/feed.py @@ -6,6 +6,7 @@ # from lxml import objectify +from lxml import etree from datetime import datetime import dateutil.parser import urllib.request, urllib.parse, urllib.error @@ -23,12 +24,22 @@ def removenamespaces(xml): elem.tag = elem.tag[nsl:] def parsexml(astr): - xml = objectify.fromstring(astr) - removenamespaces(xml) + try: + xml = objectify.fromstring(astr) + removenamespaces(xml) + except etree.XMLSyntaxError: + try: + parser = etree.HTMLParser() + xml = objectify.fromstring(astr) + removenamespaces(xml) + except etree.XMLSyntaxError: + return None return xml def parse(astr): xml = parsexml(astr) + if xml == None: + return None feed = {} articles = [] diff --git a/zs.py b/zs.py @@ -46,6 +46,9 @@ def run(db, selfeed=None, dryrun=False): estr = "incompleteread" continue + if curfeed == None: + continue + # retry handling if estr != None: if retries > 2: