CIT 597 Second SAX Example
Fall 2002, David Matuszek

The following program reads in and prints out an XML file. Although basically a trivial program, it actually exercises all the most important features of the SAX parser. This code is available as a zip file containing a BlueJ package.

EchoXmlFile.java
/**
 * Program to read in and display an XML file. This code is "inspired by"
 * Sun Microsystem's tutorial example, <a href =
 * "http://java.sun.com/xml/jaxp/dist/1.1/docs/tutorial/sax/2a_echo.html">
 * Echoing an XML File with the SAX Parser/a>. Refer to that file for
 * a copyright notice; it is not copied here because the resultant
 * program bears little resemblance to the program used as a model.
 * 
 * @author David Matuszek
 * @version 1.0
 */
import java.io.*;
import java.awt.*;

import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;

import javax.xml.parsers.SAXParserFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;

public class EchoXmlFile extends DefaultHandler {

    static private Writer out;
    static private String nl =  System.getProperty("line.separator");
    static private Locator documentLocator;
    static private int indentLevel = 0;

    /**
     * Main method to read in and display an XML file.
     */
    public static void main(String args[]) {
    
        File inputFile = getFile("Get what XML file?",
                                 FileDialog.LOAD);
        if (inputFile == null) return;
        
        // Use an instance of ourselves as the SAX event handler
        DefaultHandler handler = new EchoXmlFile();
        
        // Use the validating parser
        SAXParserFactory factory = SAXParserFactory.newInstance();
        factory.setValidating(true);
        
        try {
            // Set up output stream
            out = new OutputStreamWriter(System.out, "UTF8");

            // Parse the input
            SAXParser saxParser = factory.newSAXParser();
            saxParser.parse(inputFile, handler);

        }
        catch (SAXParseException e) {
            printSaxParseException("caught in 'main' method", e);
        }
        catch (Exception t) {
            SAXException e =
                new SAXException(documentLocator.getSystemId(), t);
            e.printStackTrace();
        }
        System.exit(0);
    }

    //===========================================================
    // SAX DocumentHandler methods
    //===========================================================

    /**

     * Overrides <code>setDocumentLocator(String <i>locator</i>)</code> in
     * <code>org.xml.sax.helpers.DefaultHandler</code>,
     * which in turn implements <code>org.xml.sax.ContentHandler</code>.
     * This method is used to save the document location in case
     * it is ever needed (for example, in an error message).
     */
    public void setDocumentLocator(Locator locator) {
        try {
            documentLocator = locator;
            write(nl + "setDocumentLocator");
            write(nl + "    (Saving \"" + locator.getSystemId() + "\")");
        } catch (SAXException e) {
            e.printStackTrace();
        }
    }

    /**
     * Overrides <code>processingInstruction(String <i>target</i>,
     * String <i>data</i>)</code> in
     * <code>org.xml.sax.helpers.DefaultHandler</code>,
     * which in turn implements <code>org.xml.sax.ContentHandler</code>.
     * Called whenever a processing instruction (PI) is encountered.
     */
    public void processingInstruction(String target, String data)
                throws SAXException {
        write(nl + "processingInstruction");
        write(nl + "     <?" + target + " " + data + "?>");
    }

    /**
     * Overrides <code>startDocument()</code> in
     * <code>org.xml.sax.helpers.DefaultHandler</code>,
     * which in turn implements <code>org.xml.sax.ContentHandler</code>.
     * Called once at the beginning of document processing, but
     * after setDocumentLocator has been called.
     */
    public void startDocument()
                throws SAXException {
        write(nl + "startDocument ");
    }

    /**
     * Overrides <code>endDocument()</code> in
     * <code>org.xml.sax.helpers.DefaultHandler</code>,
     * which in turn implements <code>org.xml.sax.ContentHandler</code>.
     * Called once at the end of document processing; no further
     * callbacks will occur.
     */
    public void endDocument()
                throws SAXException {
        try {
            write(nl + "endDocument" + nl);
            out.flush();
        } catch (IOException e) {
            throw new SAXException("I/O error", e);
        }
    }

    /**
     * Overrides <code>startElement(...)</code> in
     * <code>org.xml.sax.helpers.DefaultHandler</code>,
     * which in turn implements <code>org.xml.sax.ContentHandler</code>.
     * Called for each start tag encountered.
     * 
     * @param namespaceURI Required if the namespaces property is true.
     * @param attributes The specified or defaulted attributes.
     * @param localName The local name (without prefix), or the empty
     *        string if Namespace processing is not being performed.
     * @param qualifiedName The qualified name (with prefix), or the
     *        empty string if qualified names are not available. 
     */
    public void startElement(String namespaceURI,
                             String localName,
                             String qualifiedName,
                             Attributes attributes)
                throws SAXException {
        // Emit element name
        write(nl + "startElement " + indent(+1) + "<" +
             getName(localName, qualifiedName));
        
        // Emit each attribute name/value pair in this element
        if (attributes != null) {
            for (int i = 0; i < attributes.getLength(); i++) {
                String attributeName = getName(attributes.getLocalName(i),
                                               attributes.getQName(i));
                write(" " + attributeName + "=\"" +
                     attributes.getValue(i) + "\"");
            }
        }
        write(">");
    }

    /**
     * Overrides <code>endElement()</code> in
     * <code>org.xml.sax.helpers.DefaultHandler</code>,
     * which in turn implements <code>org.xml.sax.ContentHandler</code>.
     * Called for each start tag encountered.
     */
    public void endElement(String namespaceUri,
                           String localName,
                           String qualifiedName)
                throws SAXException {
        write(nl + "endElement   " + indent(-1) +
             "</" + getName(localName, qualifiedName) + ">");
    }

    /**
     * Overrides <code>characters(char[] <i>ch</i>,
     * int <i>start</i>, int <i>length</i>)</code> in
     * <code>org.xml.sax.helpers.DefaultHandler</code>,
     * which in turn implements <code>org.xml.sax.ContentHandler</code>.
     * Called one <i>or more</i> times for the content characters
     * of each element. The particular characters may be accessed
     * with <code>new String(<i>buf</i>, <i>offset</i>, <i>len</i>)</code>.
     */
    public void characters(char buf[], int offset, int len)
                throws SAXException {
        String s = new String(buf, offset, len);
        write(nl + "characters   " + indent(0) + "[" + s.trim() + "]");
    }

    /**
     * Overrides <code>ignorableWhitespace(char[] <i>ch</i>,
     * int <i>start</i>, int <i>length</i>)</code> in
     * <code>org.xml.sax.helpers.DefaultHandler</code>,
     * which in turn implements <code>org.xml.sax.ContentHandler</code>.
     * Can only be called when validation is being performed.
     * The particular characters may be accessed
     * with <code>new String(<i>buf</i>, <i>offset</i>, <i>len</i>)</code>.
     */
    public void ignorableWhitespace(char buf[], int offset, int len)
                throws SAXException {
        String s = new String(buf, offset, len);
        write(nl + "ignorableW..." + indent(0));
    }


    /**
     * Overrides <code>error(SAXParseException <i>exception</i>)</code> in
     * <code>org.xml.sax.helpers.DefaultHandler</code>,
     * which in turn implements <code>org.xml.sax.ContentHandler</code>.
     * Called when a recoverable parser error occurs.
     */
    public void error(SAXParseException exception)
                throws SAXException {
        printSaxParseException("caught in 'error' method", exception);
    }
    
    /**
     * Overrides <code>warning(SAXParseException <i>exception</i>)</code> in
     * <code>org.xml.sax.helpers.DefaultHandler</code>,
     * which in turn implements <code>org.xml.sax.ContentHandler</code>.
     * Called when a recoverable parser warning occurs.
     */
    public void warning(SAXParseException exception)
                throws SAXException {
        printSaxParseException("caught in 'warning' method", exception);
    }
    
    //===========================================================
    // Utility Methods ...
    //===========================================================

    /**
     * Use a FileDialog to locate a file for input or output.
     * 
     * @param message The message to display in the title bar of
     *        the FileDialog
     * @param mode Should be <code>FileDialog.LOAD</code> for reading,
     *        or <code>FileDialog.SAVE</code> for saving.
     * @return The File that has been located, or <code>null</code>
     *         if the operation was cancelled.
     */
    public static File getFile(String message, int mode) {

        // create and display a file dialog
        FileDialog dialog =
            new FileDialog(new Frame(), message, mode);
        dialog.setVisible(true);

        // get the directory and name of the selected file
        String dir = dialog.getDirectory();
        String file = dialog.getFile();

        // make sure we got a file name
        if (dir == null || file == null) {
            System.err.println("No file selected.");
            System.exit(-1);
        }
        // construct the full path name of the file
        String filelementName = dir + file;
        return new File(filelementName);
    }
    
    /**
     * If the first String parameter is nonempty, return it,
     * else return the second string parameter.
     * 
     * @param s1 The string to be tested.
     * @param s2 The alternate String.
     * @return s1 if it isn't empty, else s2.
     */
    private String getName(String s1, String s2) {
        if (s1 == null || "".equals(s1)) return s2;
        else return s1;
    }
    
    /**
     * Returns a String to use for indentation and changes the static
     * variable <code>indentLevel</code> according to the input parameter
     * <code>change</code> as follows:
     * <ul>
     *   <li>If <code>change</code> < 0, reduce the indentation
     *       level and return a string at the new level</li>
     *   <li>If <code>change</code> = 0, return a string at the
     *       current level</li>
     *   <li>If <code>change</code> > 0, increase the indentation,
     *       but return a string at previous level</li>
     * <ul>
     * 
     * @param change Whether to decrease, leave alone, or increase
     *      the indentation level.
     * @return A string to use for indentation.
     */
    private String indent(int change) {
        final int STEP = 4;
        String indentationString = 
            "|   |   |   |   |   |   |   |   |   |   |   |   |   |   ";
        int returnLevel;
        
        if (change > 0) {
            returnLevel = indentLevel;
            indentLevel++;
        } else if (change == 0) {
            returnLevel = indentLevel;
        } else {
            assert change < 0;
            indentLevel--;
            returnLevel = indentLevel;
        }
        if (returnLevel * STEP < indentationString.length()) {
            return indentationString.substring(0, returnLevel * STEP);
        } else {
            return indentationString;
        }
    }
            
    // Wrap I/O exceptions in SAX exceptions, to
    // suit handler signature requirements
    private void write(String s)
                 throws SAXException {
        try {
            out.write(s);
            out.flush();
        } catch (IOException ioException) {
            throw new SAXParseException("I/O error",
                                        documentLocator,
                                        ioException);
        }
    }
    
    /**
     * Utility method to print information about a SAXException.
     * 
     * @param message A message to be included in the error output.
     * @param e The exception to be printed.
     */
    static void printSaxException(String message, SAXException e) {
        System.err.println();
        System.err.println("*** SAX Exception -- " + message);
        System.err.println("      SystemId = \"" +
                           documentLocator.getSystemId() + "\"");
        e.printStackTrace(System.err);
    }
    
    /**
     * Utility method to print information about a SAXParseException.
     * 
     * @param message A message to be included in the error output.
     * @param e The exception to be printed.
     */
    static void printSaxParseException(String message,
                                       SAXParseException e) {
        System.err.println();
        System.err.println("*** SAX Parse Exception -- " + message);
        System.err.println("      SystemId = \"" + e.getSystemId() + "\"");
        System.err.println("      PublicId = \"" + e.getPublicId() + "\"");
        System.err.println("      line number " + e.getLineNumber());
        e.printStackTrace(System.err);
    }
}

Example input
<?xml version="1.0"?>
<!DOCTYPE novel [
<!ELEMENT novel (foreword, chapter+)>
<!ELEMENT foreword (paragraph+)>
<!ELEMENT chapter (paragraph+)>
<!ELEMENT paragraph (#PCDATA)>
]>
<?xml-stylesheet href="styles.css" type="text/css"?>
<novel>
   <foreword>
      <paragraph>
         This is the great American novel.
      </paragraph>
   </foreword>
   <chapter>
      <paragraph>
         It was a dark and stormy night.
      </paragraph>
      <paragraph>
         Suddenly, a shot rang out!
      </paragraph>
   </chapter>
</novel>

Example output
setDocumentLocator
    (Saving "file:F:/Teaching/cit597-2002/Examples/novel-int-dtd.xml")
startDocument 
processingInstruction
     <?xml-stylesheet href="styles.css" type="text/css"?>
startElement <novel>
ignorableW...|   
ignorableW...|   
ignorableW...|   
startElement |   <foreword>
ignorableW...|   |   
ignorableW...|   |   
ignorableW...|   |   
startElement |   |   <paragraph>
characters   |   |   |   []
characters   |   |   |   []
characters   |   |   |   [This is the great American novel.]
characters   |   |   |   []
characters   |   |   |   []
endElement   |   |   </paragraph>
ignorableW...|   |   
ignorableW...|   |   
ignorableW...|   |   
endElement   |   </foreword>
ignorableW...|   
ignorableW...|   
ignorableW...|   
startElement |   <chapter>
ignorableW...|   |   
ignorableW...|   |   
ignorableW...|   |   
startElement |   |   <paragraph>
characters   |   |   |   []
characters   |   |   |   []
characters   |   |   |   [It was a dark and stormy night.]
characters   |   |   |   []
characters   |   |   |   []
endElement   |   |   </paragraph>
ignorableW...|   |   
ignorableW...|   |   
ignorableW...|   |   
startElement |   |   <paragraph>
characters   |   |   |   []
characters   |   |   |   []
characters   |   |   |   [Suddenly, a shot rang out!]
characters   |   |   |   []
characters   |   |   |   []
endElement   |   |   </paragraph>
ignorableW...|   |   
ignorableW...|   |   
ignorableW...|   |   
endElement   |   </chapter>
ignorableW...|   
ignorableW...|   
ignorableW...|   
endElement   </novel>
endDocument