searx-tools

Searx helper tools.
git clone git://r-36.net/searx-tools
Log | Files | Refs | LICENSE

commit 0d6a6fd8675019cf67d7aed081f250e7030611c8
parent 91c5bcef0e7000d7d57d10745cc936fdc837b9a4
Author: Christoph Lohmann <20h@r-36.net>
Date:   Sat, 25 Aug 2018 12:37:04 +0200

Add xpath utility.

Diffstat:
xpath | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+), 0 deletions(-)

diff --git a/xpath b/xpath @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# +# Copy me if you can. +# by 20h +# + +import os +import sys +import getopt +from lxml import etree + +def getxpath(fd, xpath, attribute=None, encoding=None): + try: + parser = etree.HTMLParser(encoding=encoding) + xml = etree.parse(fd, parser) + sels = xml.xpath(xpath) + except AssertionError: + return None + + if attribute != None: + return "\n".join(["".join(i.attrib[attribute]) for i in sels \ + if attribute in i.attrib]) + + try: + return "\n".join([("".join(i.itertext())).strip() for i in sels]) + except AttributeError: + return "\n".join(sels) + +def usage(app): + app = os.path.basename(app) + sys.stderr.write("usage: %s [-h] [-e encoding] "\ + "[-a attribute] xpath\n" % (app)) + sys.exit(1) + +def main(args): + try: + opts, largs = getopt.getopt(args[1:], "he:a:") + except getopt.GetoptError as err: + print(str(err)) + usage(args[0]) + + encoding = None + attribute = None + for o, a in opts: + if o == "-h": + usage(args[0]) + elif o == "-e": + encoding = a + elif o == "-a": + attribute = a + else: + assert False, "unhandled option" + + if len(largs) < 1: + usage(args[0]) + + rpath = getxpath(sys.stdin, largs[0], attribute, encoding) + if rpath == None: + return 1 + + print(rpath) + + return 0 + +if __name__ == "__main__": + sys.exit(main(sys.argv)) +