zs

Zeitungsschau rss to email converter
git clone git://r-36.net/zs
Log | Files | Refs | LICENSE

commit 45fa8f0de67209e4cceeb1df6e3439efe55e7617
parent f9891c3921193415865fa7d3aae02407dea12ab0
Author: Christoph Lohmann <20h@r-36.net>
Date:   Wed, 11 Nov 2015 18:02:11 +0100

First HTML parsing with missing entities.

Diffstat:
zeitungsschau/feed.py | 10++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py @@ -33,10 +33,12 @@ def parsexml(astr): except etree.XMLSyntaxError: try: parser = etree.HTMLParser() - xml = objectify.fromstring(astr) + xml = objectify.fromstring(astr, parser) removenamespaces(xml) except etree.XMLSyntaxError: - return None + parser = etree.XMLParser(resolve_entities=False) + xml = objectify.fromstring(astr, parser) + removenamespaces(xml) return xml def parse(astr): @@ -50,6 +52,10 @@ def parse(astr): isrdf = False now = datetime.now(pytz.utc) + feede = xml.xpath(".//feed") + if len(feede) > 0: + xml = feede[0] + if hasattr(xml, "channel"): if hasattr(xml, "item"): isrdf = True