sweb

Some web helper scripts.
git clone git://r-36.net/sweb
Log | Files | Refs | LICENSE

xpath (1191B)


      1 #!/usr/bin/env python
      2 #
      3 # Copy me if you can.
      4 # by 20h
      5 #
      6 
      7 import os
      8 import sys
      9 import getopt
     10 from lxml import etree
     11 
     12 def getxpath(fd, xpath, attribute=None, encoding=None):
     13 	try:
     14 		parser = etree.HTMLParser(encoding=encoding)
     15 		xml = etree.parse(fd, parser)
     16 		sels = xml.xpath(xpath)
     17 	except AssertionError:
     18 		return None
     19 
     20 	if attribute != None:
     21 		return "\n".join(["".join(i.attrib[attribute]) for i in sels])
     22 
     23 	return "".join([("".join(i.itertext())).strip() for i in sels])
     24 
     25 def usage(app):
     26 	app = os.path.basename(app)
     27 	sys.stderr.write("usage: %s [-h] [-e encoding] "\
     28 			"[-a attribute] xpath\n" % (app))
     29 	sys.exit(1)
     30 
     31 def main(args):
     32 	try:
     33 		opts, largs = getopt.getopt(args[1:], "he:a:")
     34 	except getopt.GetoptError as err:
     35 		print(str(err))
     36 		usage(args[0])
     37 
     38 	encoding = None 
     39 	attribute = None
     40 	for o, a in opts:
     41 		if o == "-h":
     42 			usage(args[0])
     43 		elif o == "-e":
     44 			encoding = a
     45 		elif o == "-a":
     46 			attribute = a
     47 		else:
     48 			assert False, "unhandled option"
     49 	
     50 	if len(largs) < 1:
     51 		usage(args[0])
     52 
     53 	rpath = getxpath(sys.stdin, largs[0], attribute, encoding)
     54 	if rpath == None:
     55 		return 1
     56 
     57 	sys.stdout.write(rpath)
     58 
     59 	return 0
     60 
     61 if __name__ == "__main__":
     62 	sys.exit(main(sys.argv))
     63