zs

Zeitungsschau rss to email converter
git clone git://r-36.net/zs
Log | Files | Refs | LICENSE

commit 0aabb3ed2c6f959540a9bf48c7c8371778d008f0
parent 662c2a923b6c78163febd94eee17da36ed14242e
Author: Christoph Lohmann <20h@r-36.net>
Date:   Sat,  8 Aug 2020 11:59:11 +0200

Update objectifiy code to work with encoding declaration.

Diffstat:
zeitungsschau/feed.py | 10+++++-----
zs | 5+++++
2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/zeitungsschau/feed.py b/zeitungsschau/feed.py @@ -5,8 +5,9 @@ # by 20h # -from lxml import objectify -from lxml import etree +import lxml +import lxml.objectify +import html from datetime import datetime import dateutil.parser from dateutil.tz import gettz @@ -14,7 +15,6 @@ import requests import hashlib import pytz import codecs -import html import urllib.parse import socket import json @@ -44,9 +44,10 @@ def removenamespaces(xml): elem.tag = elem.tag[nsl:] def parsexml(astr): - xml = objectify.fromstring(astr) + xml = lxml.objectify.fromstring(html.unescape(astr.decode("utf-8")).encode("utf-8")) removenamespaces(xml) # Throw XML parsing errors so we can blame the feed authors. + #print(lxml.objectify.dump(xml)) return xml def parsetwtxtfeed(astr, uri): @@ -399,7 +400,6 @@ def fetch(uri): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((host, port)) s.send(("%s\r\n" % (selector)).encode("utf-8")) - s.shutdown(1) fd = s.makefile("r") fval = fd.read().encode("utf-8") s.close() diff --git a/zs b/zs @@ -52,6 +52,11 @@ def run(db, selfeed=None, dryrun=False, onlychanges=False): print("fetch %s" % (feeduri)) curfeed = None rcode = 0 + + """ + # All errors. + (rcode, curfeed) = feed.fetch(feeduri) + """ try: (rcode, curfeed) = feed.fetch(feeduri) except socket.gaierror: