'''xml-parser.py Jeff Ondich, 5/29/09 This program is a template program for parsing XML using the expat XML-parsing library. This particular program just counts the XML tags in an input file. ''' import sys from xml.parsers import expat class XMLFilter: ''' ... ''' def __init__(self): self.parser = None self.tagCount = 0 def report(self, out): out.write('There were %d tags.\n' % self.tagCount) def startElement(self, name, attributes): '''This method gets called for each opening tag. name is the tag name, and attributes is a dictionary of names and values for the tag attributes.''' self.tagCount += 1 def endElement(self, name): '''This method gets called for each closing tag.''' pass def characterData(self, data): '''This method gets called for each portion of text between a tag's opening and closing. It's a bit tricky when the text is interspersed among other tags inside the tag the text belongs to. For example, for bbbdddeee, characterData will be called three times, with bbb, ddd, and eee.''' pass def parse(self, inFileName): '''Here, we instantiate a parser object and initialize its handler methods for the various types of XML-parsing events.''' self.parser = expat.ParserCreate() self.parser.StartElementHandler = self.startElement self.parser.EndElementHandler = self.endElement self.parser.CharacterDataHandler = self.characterData parserStatus = self.parser.Parse(open(inFileName).read(), 1) if __name__ == '__main__': if len(sys.argv) != 2: sys.stderr.write('Usage: %s xmlFile\n' % sys.argv[0]) sys.exit(1) inFileName = sys.argv[1] filter = XMLFilter() filter.parse(inFileName) filter.report(sys.stdout)