180 likes | 332 Views
Leonidas Fegaras. XML Tools. XML Processing. Well-formedness checks Reference expansion. document parser. document validator. application. XML infoset. XML infoset (annotated). XML document. DTD or XML schema. storage system. DOM.
E N D
Leonidas Fegaras XML Tools
XML Processing Well-formedness checks Reference expansion document parser document validator application XML infoset XML infoset (annotated) XML document DTD or XML schema storage system
DOM The Document Object Model (DOM) is a platform- and language-neutral interface that allows programs and scripts to dynamically access and update the content and structure of XML documents. The following is part of the DOM interface: public interface Node { public String getNodeName (); public String getNodeValue (); public NodeList getChildNodes (); public NamedNodeMap getAttributes (); } public interface Element extends Node { public Node getElementsByTagName ( String name ); } public interface Document extends Node { public Element getDocumentElement (); } public interface NodeList { public int getLength (); public Node item ( int index ); }
DOM Example import java.io.File; import javax.xml.parsers.*; import org.w3c.dom.*; class Test { public static void main ( String args[] ) throws Exception { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(new File("depts.xml")); NodeList nodes = doc.getDocumentElement().getChildNodes(); for (int i=0; i<nodes.getLength(); i++) { Node n = nodes.item(i); NodeList ndl = n.getChildNodes(); for (int k=0; k<ndl.getLength(); k++) { Node m = ndl.item(k); if ( (m.getNodeName() == "dept") && (m.getFirstChild().getNodeValue() == "cse") ) { NodeList ncl = ((Element) m).getElementsByTagName("tel"); for (int j=0; j<ncl.getLength(); j++) { Node nc = ncl.item(j); System.out.print(nc.getFirstChild().getNodeValue()); } } } } } }
Better Programming import java.io.File; import javax.xml.parsers.*; import org.w3c.dom.*; import java.util.Vector; class Sequence extends Vector { Sequence () { super(); } Sequence ( String filename ) throws Exception { super(); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = db.parse(new File(filename)); add((Object) doc.getDocumentElement()); } Sequence child ( String tagname ) { Sequence result = new Sequence(); for (int i = 0; i<size(); i++) { Node n = (Node) elementAt(i); NodeList c = n.getChildNodes(); for (int k = 0; k<c.getLength(); k++) if (c.item(k).getNodeName().equals(tagname)) result.add((Object) c.item(k)); }; return result; } void print () { for (int i = 0; i<size(); i++) System.out.println(elementAt(i).toString()); } } class DOM { public static void main ( String args[] ) throws Exception { (new Sequence("cs.xml")).child("gradstudent").child("name").print(); } }
SAX • SAX is the Simple API for XML that allows you to process a document as it's being read • in contrast to DOM, which requires the entire document to be read before it takes any action) • The SAX API is event based • The XML parser sends events, such as the start or the end of an element, to an event handler, which processes the information
Parser Events • Receive notification of the beginning of a document void startDocument () • Receive notification of the end of a document void endDocument () • Receive notification of the beginning of an element void startElement ( String namespace, String localName, String qName, Attributes atts ) • Receive notification of the end of an element void endElement ( String namespace, String localName, String qName ) • Receive notification of character data void characters ( char[] ch, int start, int length )
SAX Example: a Printer import java.io.FileReader; import javax.xml.parsers.*; import org.xml.sax.*; import org.xml.sax.helpers.*; class Printer extends DefaultHandler { public Printer () { super(); } public void startDocument () {} public void endDocument () { System.out.println(); } public void startElement ( String uri, String name, String tag, Attributes atts ) { System.out.print(“<” + tag + “>”); } public void endElement ( String uri, String name, String tag ) { System.out.print(“</”+ tag + “>”); } public void characters ( char text[], int start, int length ) { System.out.print(new String(text,start,length)); } }
The Child Handler class Child extends DefaultHandler { DefaultHandler next; // the next handler in the pipeline String ptag; // the tagname of the child boolean keep; // are we keeping or skipping events? short level; // the depth level of the current element public Child ( String s, DefaultHandler n ) { super(); next = n; ptag = s; keep = false; level = 0; } public void startDocument () throws SAXException { next.startDocument(); } public void endDocument () throws SAXException { next.endDocument(); }
The Child Handler (cont.) public void startElement ( String nm, String ln, String qn, Attributes a ) throws SAXException { if (level++ == 1) keep = ptag.equals(qn); if (keep) next.startElement(nm,ln,qn,a); } public void endElement ( String nm, String ln, String qn ) throws SAXException { if (keep) next.endElement(nm,ln,qn); if (--level == 1) keep = false; } public void characters ( char[] text, int start, int length ) throws SAXException { if (keep) next.characters(text,start,length); } }
Forming the Pipeline class SAX { public static void main ( String args[] ) throws Exception { SAXParserFactory pf = SAXParserFactory.newInstance(); SAXParser parser = pf.newSAXParser(); DefaultHandler handler = new Child("gradstudent", new Child("name", new Printer())); parser.parse(new InputSource(new FileReader("cs.xml")), handler); } } Child:name SAX parser Printer Child:gradstudent
Example Input Stream <department> <deptname> Computer Science </deptname> <gradstudent> <name> <lastname> Smith </lastname> <firstname> John </firstname> </name> </gradstudent> ... </department> SAX Events SD: SE: department SE: deptname C: Computer Science EE: deptname SE: gradstudent SE: name SE: lastname C: Smith EE: lastname SE: firstname C: John EE: firstname EE: name EE: gradstudent ... EE: department ED: Child: gradstudent Child: name Printer
XSL Transformation A stylesheet specification language for converting XML documents into various forms (XML, HTML, plain text, etc). • Can transform each XML element into another element, add new elements into the output file, or remove elements. • Can rearrange and sort elements, test and make decisions about which elements to display, and much more. • Based on XPath: <xsl:stylesheet version=’1.0’ xmlns:xsl=’http//www.w3.org/1999/XSL/Transform’> <students> <xsl:copy-of select=”//student/name”/> </students> </xsl:stylesheet>
XSLT Templates • XSL uses XPath to define parts of the source document that match one or more predefined templates. • When a match is found, XSLT will transform the matching part of the source document into the result document. • The parts of the source document that do not match a template will end up unmodified in the result document (they will use the default templates). Form: <xsl:template match=”XPath expression”> … </xsl:template> The default (implicit) templates visit all nodes and strip out all tags: <xsl:template match=”*|/”> <xsl:apply-templates/> </xsl:template> <xsl:template match=“text()|@*"> <xsl:value-of select=“.”/> </xsl:template>
Other XSLT Elements <xsl:value-of select=“XPath expression“/> select the value of an XML element and add it to the output stream of the transformation, e.g. <xsl:value-of select="//books/book/author"/>. <xsl:copy-of select=“XPath expression“/> copy the entire XML element to the output stream of the transformation. <xsl:apply-templates match=“XPath expression“/> apply the template rules to the elements that match the XPath expression. <xsl:element name=“XPath expression“> … </xsl:element> add an element to the output with a tag-name derived from the XPath. Example: <xsl:stylesheet version = ’1.0’ xmlns:xsl=’http://www.w3.org/1999/XSL/Transform’> <xsl:template match="employee"> <b> <xsl:apply-templates select="node()"/> </b> </xsl:template> <xsl:template match="surname"> <i> <xsl:value-of select="."/> </i> </xsl:template> </xsl:stylesheet>
Copy the Entire Document <xsl:stylesheet version = ’1.0’ xmlns:xsl=’http://www.w3.org/1999/XSL/Transform’> <xsl:template match=“/"> <xsl:apply-templates/> </xsl:template> <xsl:template match=“text()"> <xsl:value-of select=“.”/> </xsl:template> <xsl:template match=“*"> <xsl:element name=“name(.)”> <xsl:apply-templates/> </xsl:element> </xsl:template> </xsl:stylesheet>
More on XSLT • Conflict resolution: more specific templates overwrite more general templates. Templates are assigned default priorities, but they can be overwritten using priority=“n” in a template. • Modes can be used to group together templates. No mode is an empty mode. <xsl:template match=“…” mode=“A”> <xsl:apply-templates mode=“B”/> </xsl:template> • Conditional and loop statements: <xsl:if test=“XPath predicate”> body </xsl:if> <xsl:for-each select=“XPath”> body </xsl:for-each> • Variables can be used to name data: <xsl:variable name=“x”> value </xsl:variable> Variables are used as {$x} in XPaths.
Using XSLT import javax.xml.parsers.*; import org.xml.sax.*; import org.w3c.dom.*; import javax.xml.transform.*; import javax.xml. . transform.dom.*; import javax.xml.transformstream.*; import java.io.*; class XSLT { public static void main ( String argv[] ) throws Exception { File stylesheet = new File("x.xsl"); File xmlfile = new File("a.xml"); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document document = db.parse(xmlfile); StreamSource stylesource = new StreamSource(stylesheet); TransformerFactory tf = TransformerFactory.newInstance(); Transformer transformer = tf.newTransformer(stylesource); DOMSource source = new DOMSource(document); StreamResult result = new StreamResult(System.out); transformer.transform(source,result); } }