modules/xmpp/simplexml.py
changeset 0 93b25987d3e5
equal deleted inserted replaced
-1:000000000000 0:93b25987d3e5
       
     1 ##   simplexml.py based on Mattew Allum's xmlstream.py
       
     2 ##
       
     3 ##   Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov
       
     4 ##
       
     5 ##   This program is free software; you can redistribute it and/or modify
       
     6 ##   it under the terms of the GNU General Public License as published by
       
     7 ##   the Free Software Foundation; either version 2, or (at your option)
       
     8 ##   any later version.
       
     9 ##
       
    10 ##   This program is distributed in the hope that it will be useful,
       
    11 ##   but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    12 ##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
       
    13 ##   GNU General Public License for more details.
       
    14 
       
    15 # $Id: simplexml.py,v 1.30 2006/06/03 12:22:34 normanr Exp $
       
    16 
       
    17 """Simplexml module provides xmpppy library with all needed tools to handle XML nodes and XML streams.
       
    18 I'm personally using it in many other separate projects. It is designed to be as standalone as possible."""
       
    19 
       
    20 import xml.parsers.expat
       
    21 
       
    22 def XMLescape(txt):
       
    23     """Returns provided string with symbols & < > " replaced by their respective XML entities."""
       
    24     return txt.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;")
       
    25 
       
    26 ENCODING='utf-8'
       
    27 def ustr(what):
       
    28     """Converts object "what" to unicode string using it's own __str__ method if accessible or unicode method otherwise."""
       
    29     if type(what) == type(u''): return what
       
    30     try: r=what.__str__()
       
    31     except AttributeError: r=str(what)
       
    32     if type(r)<>type(u''): return unicode(r,ENCODING)
       
    33     return r
       
    34 
       
    35 class Node:
       
    36     """ Node class describes syntax of separate XML Node. It have a constructor that permits node creation
       
    37         from set of "namespace name", attributes and payload of text strings and other nodes.
       
    38         It does not natively support building node from text string and uses NodeBuilder class for that purpose.
       
    39         After creation node can be mangled in many ways so it can be completely changed.
       
    40         Also node can be serialised into string in one of two modes: default (where the textual representation
       
    41         of node describes it exactly) and "fancy" - with whitespace added to make indentation and thus make
       
    42         result more readable by human.
       
    43 
       
    44         Node class have attribute FORCE_NODE_RECREATION that is defaults to False thus enabling fast node
       
    45         replication from the some other node. The drawback of the fast way is that new node shares some
       
    46         info with the "original" node that is changing the one node may influence the other. Though it is
       
    47         rarely needed (in xmpppy it is never needed at all since I'm usually never using original node after
       
    48         replication (and using replication only to move upwards on the classes tree).
       
    49     """
       
    50     FORCE_NODE_RECREATION=0
       
    51     def __init__(self, tag=None, attrs={}, payload=[], parent=None, node=None):
       
    52         """ Takes "tag" argument as the name of node (prepended by namespace, if needed and separated from it
       
    53             by a space), attrs dictionary as the set of arguments, payload list as the set of textual strings
       
    54             and child nodes that this node carries within itself and "parent" argument that is another node
       
    55             that this one will be the child of. Also the __init__ can be provided with "node" argument that is 
       
    56             either a text string containing exactly one node or another Node instance to begin with. If both
       
    57             "node" and other arguments is provided then the node initially created as replica of "node"
       
    58             provided and then modified to be compliant with other arguments."""
       
    59         if node:
       
    60             if self.FORCE_NODE_RECREATION and type(node)==type(self): node=str(node)
       
    61             if type(node)<>type(self): node=NodeBuilder(node,self)
       
    62             else:
       
    63                 self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = node.name,node.namespace,{},[],[],node.parent
       
    64                 for key  in node.attrs.keys(): self.attrs[key]=node.attrs[key]
       
    65                 for data in node.data: self.data.append(data)
       
    66                 for kid  in node.kids: self.kids.append(kid)
       
    67         else: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent = 'tag','',{},[],[],None
       
    68 
       
    69         if tag: self.namespace, self.name = ([self.namespace]+tag.split())[-2:]
       
    70         if parent: self.parent = parent
       
    71         if self.parent and not self.namespace: self.namespace=self.parent.namespace
       
    72         for attr in attrs.keys():
       
    73             self.attrs[attr]=attrs[attr]
       
    74         if type(payload) in (type(''),type(u'')): payload=[payload]
       
    75         for i in payload:
       
    76             if type(i)==type(self): self.addChild(node=i)
       
    77             else: self.data.append(ustr(i))
       
    78 
       
    79     def __str__(self,fancy=0):
       
    80         """ Method used to dump node into textual representation.
       
    81             if "fancy" argument is set to True produces indented output for readability."""
       
    82         s = (fancy-1) * 2 * ' ' + "<" + self.name
       
    83         if self.namespace:
       
    84             if not self.parent or self.parent.namespace!=self.namespace:
       
    85                 s = s + ' xmlns="%s"'%self.namespace
       
    86         for key in self.attrs.keys():
       
    87             val = ustr(self.attrs[key])
       
    88             s = s + ' %s="%s"' % ( key, XMLescape(val) )
       
    89         s = s + ">"
       
    90         cnt = 0 
       
    91         if self.kids:
       
    92             if fancy: s = s + "\n"
       
    93             for a in self.kids:
       
    94                 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt])
       
    95                 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip())
       
    96                 s = s + a.__str__(fancy and fancy+1)
       
    97                 cnt=cnt+1
       
    98         if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
       
    99         elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip())
       
   100         if not self.kids and s[-1:]=='>':
       
   101             s=s[:-1]+' />'
       
   102             if fancy: s = s + "\n"
       
   103         else:
       
   104             if fancy and not self.data: s = s + (fancy-1) * 2 * ' '
       
   105             s = s + "</" + self.name + ">"
       
   106             if fancy: s = s + "\n"
       
   107         return s
       
   108     def getCDATA(self):
       
   109         """ Serialise node, dropping all tags and leaving CDATA intact.
       
   110             That is effectively kills all formatiing, leaving only text were contained in XML.
       
   111         """
       
   112         s =""
       
   113         cnt = 0
       
   114         if self.kids:
       
   115             for a in self.kids:
       
   116                 s=s+self.data[cnt]
       
   117                 s = s + mystr(a)
       
   118                 cnt=cnt+1
       
   119         if (len(self.data)-1) >= cnt: s = s + self.data[cnt]
       
   120         return s
       
   121     def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
       
   122         """ If "node" argument is provided, adds it as child node. Else creates new node from
       
   123             the other arguments' values and adds it as well."""
       
   124         if attrs.has_key('xmlns'):
       
   125             raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}")
       
   126         if namespace: name=namespace+' '+name
       
   127         if node:
       
   128             newnode=node
       
   129             node.parent = self
       
   130         else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload)
       
   131         self.kids.append(newnode)
       
   132         return newnode
       
   133     def addData(self, data):
       
   134         """ Adds some CDATA to node. """
       
   135         self.data.append(ustr(data))
       
   136     def clearData(self):
       
   137         """ Removes all CDATA from the node. """
       
   138         self.data=[]
       
   139     def delAttr(self, key):
       
   140         """ Deletes an attribute "key" """
       
   141         del self.attrs[key]
       
   142     def delChild(self, node, attrs={}):
       
   143         """ Deletes the "node" from the node's childs list, if "node" is an instance.
       
   144             Else deletes the first node that have specified name and (optionally) attributes. """
       
   145         if type(node)<>type(self): node=self.getTag(node,attrs)
       
   146         self.kids.remove(node)
       
   147         return node
       
   148     def getAttrs(self):
       
   149         """ Returns all node's attributes as dictionary. """
       
   150         return self.attrs
       
   151     def getAttr(self, key):
       
   152         """ Returns value of specified attribute. """
       
   153         try: return self.attrs[key]
       
   154         except: return None
       
   155     def getChildren(self):
       
   156         """ Returns all node's child nodes as list. """
       
   157         return self.kids
       
   158     def getData(self):
       
   159         """ Returns all node CDATA as string (concatenated). """
       
   160         return ''.join(self.data)
       
   161     def getName(self):
       
   162         """ Returns the name of node """
       
   163         return self.name
       
   164     def getNamespace(self):
       
   165         """ Returns the namespace of node """
       
   166         return self.namespace
       
   167     def getParent(self):
       
   168         """ Returns the parent of node (if present). """
       
   169         return self.parent
       
   170     def getPayload(self):
       
   171         """ Return the payload of node i.e. list of child nodes and CDATA entries.
       
   172             F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned list:
       
   173             ['text1', <nodea instance>, <nodeb instance>, ' text2']. """
       
   174         ret=[]
       
   175         for i in range(len(self.kids)+len(self.data)+1):
       
   176             try:
       
   177                 if self.data[i]: ret.append(self.data[i])
       
   178             except IndexError: pass
       
   179             try: ret.append(self.kids[i])
       
   180             except IndexError: pass
       
   181         return ret
       
   182     def getTag(self, name, attrs={}, namespace=None): 
       
   183         """ Filters all child nodes using specified arguments as filter.
       
   184             Returns the first found or None if not found. """
       
   185         return self.getTags(name, attrs, namespace, one=1)
       
   186     def getTagAttr(self,tag,attr):
       
   187         """ Returns attribute value of the child with specified name (or None if no such attribute)."""
       
   188         try: return self.getTag(tag).attrs[attr]
       
   189         except: return None
       
   190     def getTagData(self,tag):
       
   191         """ Returns cocatenated CDATA of the child with specified name."""
       
   192         try: return self.getTag(tag).getData()
       
   193         except: return None
       
   194     def getTags(self, name, attrs={}, namespace=None, one=0):
       
   195         """ Filters all child nodes using specified arguments as filter.
       
   196             Returns the list of nodes found. """
       
   197         nodes=[]
       
   198         for node in self.kids:
       
   199             if namespace and namespace<>node.getNamespace(): continue
       
   200             if node.getName() == name:
       
   201                 for key in attrs.keys():
       
   202                    if not node.attrs.has_key(key) or node.attrs[key]<>attrs[key]: break
       
   203                 else: nodes.append(node)
       
   204             if one and nodes: return nodes[0]
       
   205         if not one: return nodes
       
   206     def setAttr(self, key, val):
       
   207         """ Sets attribute "key" with the value "val". """
       
   208         self.attrs[key]=val
       
   209     def setData(self, data):
       
   210         """ Sets node's CDATA to provided string. Resets all previous CDATA!"""
       
   211         self.data=[ustr(data)]
       
   212     def setName(self,val):
       
   213         """ Changes the node name. """
       
   214         self.name = val
       
   215     def setNamespace(self, namespace):
       
   216         """ Changes the node namespace. """
       
   217         self.namespace=namespace
       
   218     def setParent(self, node): 
       
   219         """ Sets node's parent to "node". WARNING: do not checks if the parent already present 
       
   220             and not removes the node from the list of childs of previous parent. """
       
   221         self.parent = node
       
   222     def setPayload(self,payload,add=0):
       
   223         """ Sets node payload according to the list specified. WARNING: completely replaces all node's
       
   224             previous content. If you wish just to add child or CDATA - use addData or addChild methods. """
       
   225         if type(payload) in (type(''),type(u'')): payload=[payload]
       
   226         if add: self.kids+=payload
       
   227         else: self.kids=payload
       
   228     def setTag(self, name, attrs={}, namespace=None):
       
   229         """ Same as getTag but if the node with specified namespace/attributes not found, creates such
       
   230             node and returns it. """
       
   231         node=self.getTags(name, attrs, namespace=namespace, one=1)
       
   232         if node: return node
       
   233         else: return self.addChild(name, attrs, namespace=namespace)
       
   234     def setTagAttr(self,tag,attr,val):
       
   235         """ Creates new node (if not already present) with name "tag"
       
   236             and sets it's attribute "attr" to value "val". """
       
   237         try: self.getTag(tag).attrs[attr]=val
       
   238         except: self.addChild(tag,attrs={attr:val})
       
   239     def setTagData(self,tag,val,attrs={}):
       
   240         """ Creates new node (if not already present) with name "tag" and (optionally) attributes "attrs"
       
   241             and sets it's CDATA to string "val". """
       
   242         try: self.getTag(tag,attrs).setData(ustr(val))
       
   243         except: self.addChild(tag,attrs,payload=[ustr(val)])
       
   244     def has_attr(self,key):
       
   245         """ Checks if node have attribute "key"."""
       
   246         return self.attrs.has_key(key)
       
   247     def __getitem__(self,item):
       
   248         """ Returns node's attribute "item" value. """
       
   249         return self.getAttr(item)
       
   250     def __setitem__(self,item,val):
       
   251         """ Sets node's attribute "item" value. """
       
   252         return self.setAttr(item,val)
       
   253     def __delitem__(self,item):
       
   254         """ Deletes node's attribute "item". """
       
   255         return self.delAttr(item,val)
       
   256     def __getattr__(self,attr):
       
   257         """ Reduce memory usage caused by T/NT classes - use memory only when needed. """
       
   258         if attr=='T':
       
   259             self.T=T(self)
       
   260             return self.T
       
   261         if attr=='NT':
       
   262             self.NT=NT(self)
       
   263             return self.NT
       
   264         raise AttributeError
       
   265 
       
   266 class T:
       
   267     """ Auxiliary class used to quick access to node's child nodes. """
       
   268     def __init__(self,node): self.__dict__['node']=node
       
   269     def __getattr__(self,attr): return self.node.getTag(attr)
       
   270     def __setattr__(self,attr,val):
       
   271         if isinstance(val,Node): Node.__init__(self.node.setTag(attr),node=val)
       
   272         else: return self.node.setTagData(attr,val)
       
   273     def __delattr__(self,attr): return self.node.delChild(attr)
       
   274 
       
   275 class NT(T):
       
   276     """ Auxiliary class used to quick create node's child nodes. """
       
   277     def __getattr__(self,attr): return self.node.addChild(attr)
       
   278     def __setattr__(self,attr,val):
       
   279         if isinstance(val,Node): self.node.addChild(attr,node=val)
       
   280         else: return self.node.addChild(attr,payload=[val])
       
   281 
       
   282 DBG_NODEBUILDER = 'nodebuilder'
       
   283 class NodeBuilder:
       
   284     """ Builds a Node class minidom from data parsed to it. This class used for two purposes:
       
   285         1. Creation an XML Node from a textual representation. F.e. reading a config file. See an XML2Node method.
       
   286         2. Handling an incoming XML stream. This is done by mangling 
       
   287            the __dispatch_depth parameter and redefining the dispatch method.
       
   288         You do not need to use this class directly if you do not designing your own XML handler."""
       
   289     def __init__(self,data=None,initial_node=None):
       
   290         """ Takes two optional parameters: "data" and "initial_node".
       
   291             By default class initialised with empty Node class instance.
       
   292             Though, if "initial_node" is provided it used as "starting point".
       
   293             You can think about it as of "node upgrade".
       
   294             "data" (if provided) feeded to parser immidiatedly after instance init.
       
   295             """
       
   296         self.DEBUG(DBG_NODEBUILDER, "Preparing to handle incoming XML stream.", 'start')
       
   297         self._parser = xml.parsers.expat.ParserCreate(namespace_separator=' ')
       
   298         self._parser.StartElementHandler       = self.starttag
       
   299         self._parser.EndElementHandler         = self.endtag
       
   300         self._parser.CharacterDataHandler      = self.handle_data
       
   301         self._parser.StartNamespaceDeclHandler = self.handle_namespace_start
       
   302         self.Parse = self._parser.Parse
       
   303 
       
   304         self.__depth = 0
       
   305         self._dispatch_depth = 1
       
   306         self._document_attrs = None
       
   307         self._mini_dom=initial_node
       
   308         self.last_is_data = 1
       
   309         self._ptr=None
       
   310         self.namespaces={"http://www.w3.org/XML/1998/namespace":'xml:'}
       
   311         self.xmlns="http://www.w3.org/XML/1998/namespace"
       
   312 
       
   313         if data: self._parser.Parse(data,1)
       
   314 
       
   315     def destroy(self):
       
   316         """ Method used to allow class instance to be garbage-collected. """
       
   317         self._parser.StartElementHandler       = None
       
   318         self._parser.EndElementHandler         = None
       
   319         self._parser.CharacterDataHandler      = None
       
   320         self._parser.StartNamespaceDeclHandler = None
       
   321 
       
   322     def starttag(self, tag, attrs):
       
   323         """XML Parser callback. Used internally"""
       
   324         attlist=attrs.keys()       #
       
   325         for attr in attlist:       # FIXME: Crude hack. And it also slows down the whole library considerably.
       
   326             sp=attr.rfind(" ")     #
       
   327             if sp==-1: continue    #
       
   328             ns=attr[:sp]           #
       
   329             attrs[self.namespaces[ns]+attr[sp+1:]]=attrs[attr]
       
   330             del attrs[attr]        #
       
   331         self.__depth += 1
       
   332         self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`), 'down')
       
   333         if self.__depth == self._dispatch_depth:
       
   334             if not self._mini_dom : self._mini_dom = Node(tag=tag, attrs=attrs)
       
   335             else: Node.__init__(self._mini_dom,tag=tag, attrs=attrs)
       
   336             self._ptr = self._mini_dom
       
   337         elif self.__depth > self._dispatch_depth:
       
   338             self._ptr.kids.append(Node(tag=tag,parent=self._ptr,attrs=attrs))
       
   339             self._ptr = self._ptr.kids[-1]
       
   340         if self.__depth == 1:
       
   341             self._document_attrs = attrs
       
   342             ns, name = (['']+tag.split())[-2:]
       
   343             self.stream_header_received(ns, name, attrs)
       
   344         if not self.last_is_data and self._ptr.parent: self._ptr.parent.data.append('')
       
   345         self.last_is_data = 0
       
   346 
       
   347     def endtag(self, tag ):
       
   348         """XML Parser callback. Used internally"""
       
   349         self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s" % (self.__depth, tag), 'up')
       
   350         if self.__depth == self._dispatch_depth:
       
   351             self.dispatch(self._mini_dom)
       
   352         elif self.__depth > self._dispatch_depth:
       
   353             self._ptr = self._ptr.parent
       
   354         else:
       
   355             self.DEBUG(DBG_NODEBUILDER, "Got higher than dispatch level. Stream terminated?", 'stop')
       
   356         self.__depth -= 1
       
   357         self.last_is_data = 0
       
   358         if self.__depth == 0: self.stream_footer_received()
       
   359 
       
   360     def handle_data(self, data):
       
   361         """XML Parser callback. Used internally"""
       
   362         self.DEBUG(DBG_NODEBUILDER, data, 'data')
       
   363         if not self._ptr: return
       
   364         if self.last_is_data:
       
   365             self._ptr.data[-1] += data
       
   366         else:
       
   367             self._ptr.data.append(data)
       
   368             self.last_is_data = 1
       
   369 
       
   370     def handle_namespace_start(self, prefix, uri):
       
   371         """XML Parser callback. Used internally"""
       
   372         if prefix: self.namespaces[uri]=prefix+':'
       
   373         else: self.xmlns=uri
       
   374     def DEBUG(self, level, text, comment=None):
       
   375         """ Gets all NodeBuilder walking events. Can be used for debugging if redefined."""
       
   376     def getDom(self):
       
   377         """ Returns just built Node. """
       
   378         return self._mini_dom
       
   379     def dispatch(self,stanza):
       
   380         """ Gets called when the NodeBuilder reaches some level of depth on it's way up with the built
       
   381             node as argument. Can be redefined to convert incoming XML stanzas to program events. """
       
   382     def stream_header_received(self,ns,tag,attrs):
       
   383         """ Method called when stream just opened. """
       
   384     def stream_footer_received(self):
       
   385         """ Method called when stream just closed. """
       
   386 
       
   387 def XML2Node(xml):
       
   388     """ Converts supplied textual string into XML node. Handy f.e. for reading configuration file.
       
   389         Raises xml.parser.expat.parsererror if provided string is not well-formed XML. """
       
   390     return NodeBuilder(xml).getDom()
       
   391 
       
   392 def BadXML2Node(xml):
       
   393     """ Converts supplied textual string into XML node. Survives if xml data is cutted half way round.
       
   394         I.e. "<html>some text <br>some more text". Will raise xml.parser.expat.parsererror on misplaced
       
   395         tags though. F.e. "<b>some text <br>some more text</b>" will not work."""
       
   396     return NodeBuilder(xml).getDom()