# # SimpleXMLWriter # $Id: SimpleXMLWriter.py 2312 2005-03-02 18:13:39Z fredrik $ # # a simple XML writer # # history: # 2001-12-28 fl created # 2002-11-25 fl fixed attribute encoding # 2002-12-02 fl minor fixes for 1.5.2 # 2004-06-17 fl added pythondoc markup # 2004-07-23 fl added flush method (from Jay Graves) # 2004-10-03 fl added declaration method # # Copyright (c) 2001-2004 by Fredrik Lundh # # fredrik@pythonware.com # http://www.pythonware.com # # -------------------------------------------------------------------- # The SimpleXMLWriter module is # # Copyright (c) 2001-2004 by Fredrik Lundh # # By obtaining, using, and/or copying this software and/or its # associated documentation, you agree that you have read, understood, # and will comply with the following terms and conditions: # # Permission to use, copy, modify, and distribute this software and # its associated documentation for any purpose and without fee is # hereby granted, provided that the above copyright notice appears in # all copies, and that both that copyright notice and this permission # notice appear in supporting documentation, and that the name of # Secret Labs AB or the author not be used in advertising or publicity # pertaining to distribution of the software without specific, written # prior permission. # # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE # OF THIS SOFTWARE. # -------------------------------------------------------------------- ## # Tools to write XML files, without having to deal with encoding # issues, well-formedness, etc. #
# The current version does not provide built-in support for # namespaces. To create files using namespaces, you have to provide # "xmlns" attributes and explicitly add prefixes to tags and # attributes. # #
# # from elementtree.SimpleXMLWriter import XMLWriter # import sys # # w = XMLWriter(sys.stdout) # # html = w.start("html") # # w.start("head") # w.element("title", "my document") # w.element("meta", name="generator", value="my application 1.0") # w.end() # # w.start("body") # w.element("h1", "this is a heading") # w.element("p", "this is a paragraph") # # w.start("p") # w.data("this is ") # w.element("b", "bold") # w.data(" and ") # w.element("i", "italic") # w.data(".") # w.end("p") # # w.close(html) ### import re, sys, string try: unicode("") except NameError: def encode(s, encoding): # 1.5.2: application must use the right encoding return s _escape = re.compile(r"[&<>\"\x80-\xff]+") # 1.5.2 else: def encode(s, encoding): return s.encode(encoding) _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]+"')) def encode_entity(text, pattern=_escape): # map reserved and non-ascii characters to numerical entities def escape_entities(m): out = [] for char in m.group(): out.append("%d;" % ord(char)) return string.join(out, "") return encode(pattern.sub(escape_entities, text), "ascii") del _escape # # the following functions assume an ascii-compatible encoding # (or "utf-16") def escape_cdata(s, encoding=None, replace=string.replace): s = replace(s, "&", "&") s = replace(s, "<", "<") s = replace(s, ">", ">") if encoding: try: return encode(s, encoding) except UnicodeError: return encode_entity(s) return s def escape_attrib(s, encoding=None, replace=string.replace): s = replace(s, "&", "&") s = replace(s, "'", "'") s = replace(s, "\"", """) s = replace(s, "<", "<") s = replace(s, ">", ">") if encoding: try: return encode(s, encoding) except UnicodeError: return encode_entity(s) return s ## # XML writer class. # # @param file A file or file-like object. This object must implement # a write method that takes an 8-bit string. # @param encoding Optional encoding. class XMLWriter: def __init__(self, file, encoding="us-ascii"): if not hasattr(file, "write"): file = open(file, "w") self.__write = file.write if hasattr(file, "flush"): self.flush = file.flush self.__open = 0 # true if start tag is open self.__tags = [] self.__data = [] self.__encoding = encoding def __flush(self): # flush internal buffers if self.__open: self.__write(">") self.__open = 0 if self.__data: data = string.join(self.__data, "") self.__write(escape_cdata(data, self.__encoding)) self.__data = [] ## # Writes an XML declaration. def declaration(self): encoding = self.__encoding if encoding == "us-ascii" or encoding == "utf-8": self.__write("\n") else: self.__write("\n" % encoding) ## # Opens a new element. Attributes can be given as keyword # arguments, or as a string/string dictionary. You can pass in # 8-bit strings or Unicode strings; the former are assumed to use # the encoding passed to the constructor. The method returns an # opaque identifier that can be passed to the close method, # to close all open elements up to and including this one. # # @param tag Element tag. # @param attrib Attribute dictionary. Alternatively, attributes # can be given as keyword arguments. # @return An element identifier. def start(self, tag, attrib={}, **extra): self.__flush() tag = escape_cdata(tag, self.__encoding) self.__data = [] self.__tags.append(tag) self.__write("<%s" % tag) if attrib or extra: attrib = attrib.copy() attrib.update(extra) attrib = attrib.items() attrib.sort() for k, v in attrib: k = escape_cdata(k, self.__encoding) v = escape_attrib(v, self.__encoding) self.__write(" %s=\"%s\"" % (k, v)) self.__open = 1 return len(self.__tags)-1 ## # Adds a comment to the output stream. # # @param comment Comment text, as an 8-bit string or Unicode string. def comment(self, comment): self.__flush() self.__write("\n" % escape_cdata(comment, self.__encoding)) ## # Adds character data to the output stream. # # @param text Character data, as an 8-bit string or Unicode string. def data(self, text): self.__data.append(text) ## # Closes the current element (opened by the most recent call to # start). # # @param tag Element tag. If given, the tag must match the start # tag. If omitted, the current element is closed. def end(self, tag=None): if tag: assert self.__tags, "unbalanced end(%s)" % tag assert escape_cdata(tag, self.__encoding) == self.__tags[-1],\ "expected end(%s), got %s" % (self.__tags[-1], tag) else: assert self.__tags, "unbalanced end()" tag = self.__tags.pop() if self.__data: self.__flush() elif self.__open: self.__open = 0 self.__write(" />") return self.__write("%s>" % tag) ## # Closes open elements, up to (and including) the element identified # by the given identifier. # # @param id Element identifier, as returned by the start method. def close(self, id): while len(self.__tags) > id: self.end() ## # Adds an entire element. This is the same as calling start, # data, and end in sequence. The text argument # can be omitted. def element(self, tag, text=None, attrib={}, **extra): apply(self.start, (tag, attrib), extra) if text: self.data(text) self.end() ## # Flushes the output stream. def flush(self): pass # replaced by the constructor