searx-tools

Searx helper tools.
git clone git://r-36.net/searx-tools
Log | Files | Refs | README | LICENSE

xpath (1268B)


      1 #!/usr/bin/env python
      2 #
      3 # Copy me if you can.
      4 # by 20h
      5 #
      6 
      7 import os
      8 import sys
      9 import getopt
     10 from lxml import etree
     11 
     12 def getxpath(fd, xpath, attribute=None, encoding=None):
     13 	try:
     14 		parser = etree.HTMLParser(encoding=encoding)
     15 		xml = etree.parse(fd, parser)
     16 		sels = xml.xpath(xpath)
     17 	except AssertionError:
     18 		return None
     19 
     20 	if attribute != None:
     21 		return "\n".join(["".join(i.attrib[attribute]) for i in sels \
     22 			if attribute in i.attrib])
     23 
     24 	try:
     25 		return "\n".join([("".join(i.itertext())).strip() for i in sels])
     26 	except AttributeError:
     27 		return "\n".join(sels)
     28 
     29 def usage(app):
     30 	app = os.path.basename(app)
     31 	sys.stderr.write("usage: %s [-h] [-e encoding] "\
     32 			"[-a attribute] xpath\n" % (app))
     33 	sys.exit(1)
     34 
     35 def main(args):
     36 	try:
     37 		opts, largs = getopt.getopt(args[1:], "he:a:")
     38 	except getopt.GetoptError as err:
     39 		print(str(err))
     40 		usage(args[0])
     41 
     42 	encoding = None 
     43 	attribute = None
     44 	for o, a in opts:
     45 		if o == "-h":
     46 			usage(args[0])
     47 		elif o == "-e":
     48 			encoding = a
     49 		elif o == "-a":
     50 			attribute = a
     51 		else:
     52 			assert False, "unhandled option"
     53 	
     54 	if len(largs) < 1:
     55 		usage(args[0])
     56 
     57 	rpath = getxpath(sys.stdin, largs[0], attribute, encoding)
     58 	if rpath == None:
     59 		return 1
     60 
     61 	print(rpath)
     62 
     63 	return 0
     64 
     65 if __name__ == "__main__":
     66 	sys.exit(main(sys.argv))
     67