View Javadoc

1   /*
2    * Copyright (c) 2003-2008, by Henrik Arro and Contributors
3    *
4    * This file is part of JSeq, a tool to automatically create
5    * sequence diagrams by tracing program execution.
6    *
7    * See <http://jseq.sourceforge.net> for more information.
8    *
9    * JSeq is free software: you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser General Public License as
11   * published by the Free Software Foundation, either version 3 of
12   * the License, or (at your option) any later version.
13   *
14   * JSeq is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17   * GNU Lesser General Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser General Public License
20   * along with JSeq. If not, see <http://www.gnu.org/licenses/>.
21   */
22  
23  package th.co.edge.jseq.util;
24  
25  import java.util.ArrayList;
26  
27  import org.w3c.dom.DocumentType;
28  import org.w3c.dom.NamedNodeMap;
29  import org.w3c.dom.Node;
30  import org.w3c.dom.NodeList;
31  
32  /**
33   * A utility class to work with <code>org.w3c.dom.Node</code> objects.
34   */
35  public class XMLUtil {
36      /**
37       * This class only contains static methods, so we hide the constructor.
38       */
39      private XMLUtil() {
40      }
41  
42      /**
43       * Traverses the given <code>Node</code> and its children depth-first,
44       * calling the given <code>Visitor</code> for each node.
45       *
46       * <p>
47       * If the traversal should be stopped at a certain node, the
48       * <code>Visitor</code> should return <code>true</code> from the
49       * <code>visit</code> method for that node. Obviously, if the entire tree
50       * should be traversed, the <code>visit</code> method should always return
51       * <code>false</code>.
52       *
53       * @param node
54       *            the root <code>Node</code> at which to start the traversal
55       * @param visitor
56       *            the <code>Visitor</code>
57       *
58       * @see Visitor#visit(Node)
59       */
60      public static void traverse(Node node, Visitor visitor) {
61          find(node, visitor);
62      }
63  
64      /**
65       * Searches the given <code>Node</code> and its children depth-first,
66       * looking for the first <code>Node</code> that satisfies the given
67       * <code>Visitor</code>.
68       *
69       * @param node
70       *            the root <code>Node</code> at which to start the search
71       * @param visitor
72       *            the <code>Visitor</code> that is called for every node, and
73       *            determines if the right node has been found by returning
74       *            <code>true</code> from the <code>visit</code> method
75       *
76       * @return the first <code>Node</code> in a depth-first search that
77       *         satisfies <code>visitor</code>, or <code>null</code> if
78       *         there is none.
79       *
80       * @see Visitor#visit(Node)
81       */
82      public static Node find(Node node, Visitor visitor) {
83          Node result = null;
84          if (visitor.visit(node)) {
85              result = node;
86          } else {
87              NodeList children = node.getChildNodes();
88              for (int i = 0; i < children.getLength(); i++) {
89                  Node child = children.item(i);
90                  result = find(child, visitor);
91                  if (result != null) {
92                      break;
93                  }
94              }
95          }
96          visitor.afterVisit(node);
97          return result;
98      }
99  
100     /**
101      * Searches a <code>Node</code> and its children depth-first for an
102      * element node with a given name.
103      *
104      * @param node
105      *            the root <code>Node</code> at which to start the search
106      * @param tag
107      *            the name of the tag to search for
108      *
109      * @return the <code>Node</code> found, or <code>null</code>
110      */
111     public static Node findTag(Node node, String tag) {
112         return findTag(node, tag, null);
113     }
114 
115     /**
116      * Searches a <code>Node</code> and its children depth-first for an
117      * element node with a given name, and also containing a text sub-node with
118      * a given text.
119      *
120      * @param node
121      *            the root <code>Node</code> at which to start the search
122      * @param tag
123      *            the name of the tag to search for
124      * @param text
125      *            a string that must be included in the first text sub-node of
126      *            the found node, or <code>null</code> if only the tag name
127      *            should be used to determine if a node matches
128      *
129      * @return the <code>Node</code> found, or <code>null</code>
130      */
131     public static Node findTag(Node node, String tag, String text) {
132         Node result = find(node, new TagFinder(tag, text));
133         return result;
134     }
135 
136     /**
137      * Searches a <code>Node</code> and its children depth-first, returning an
138      * array with all element nodes with a given name.
139      *
140      * @param node
141      *            the root <code>Node</code> at which to start the search
142      * @param tag
143      *            the name of the tags to search for
144      *
145      * @return a <code>Node</code> array with all element nodes with the given
146      *         <code>name</code> starting from <code>node</code>. If no
147      *         such nodes exist, an empty array is returned
148      */
149     public static Node[] findAllTags(Node node, String tag) {
150         return findAllTags(node, tag, null);
151     }
152 
153     /**
154      * Searches a <code>Node</code> and its children depth-first, returning an
155      * array with all element nodes with a given name, and also containing a
156      * text sub-node with a given text.
157      *
158      * @param node
159      *            the root <code>Node</code> at which to start the search
160      * @param tag
161      *            the name of the tags to search for
162      * @param text
163      *            a string that must be included in the first text sub-node of
164      *            the found node, or <code>null</code> if only the tag name
165      *            should be used to determine if a node matches
166      *
167      * @return a <code>Node</code> array with all element nodes with the given
168      *         <code>name</code> and containing a text node with
169      *         <code>text</code>, starting from <code>node</code>. If no
170      *         such nodes exist, an empty array is returned
171      *
172      */
173     public static Node[] findAllTags(Node node, String tag, String text) {
174         TagCollector tagCollector = new TagCollector(tag, text);
175         traverse(node, tagCollector);
176         return tagCollector.getNodes();
177     }
178 
179     /**
180      * Returns the first text sub-node of the given <code>Node</code>.
181      *
182      * @param node
183      *            the <code>Node</code> whose children to search for the first
184      *            text node
185      *
186      * @return the first text sub-node of <code>node</code>, or
187      *         <code>null</code> if there is none
188      */
189     public static String getText(Node node) {
190         String text = getText(node, 0);
191         return text;
192     }
193 
194     /**
195      * Returns the nth text sub-node of the given <code>Node</code>.
196      *
197      * @param node
198      *            the <code>Node</code> whose children to search for the
199      *            <code>nth</code> text node
200      * @param nth
201      *            the index of the text childe node to return
202      *
203      * @return the <code>nth</code> text sub-node of <code>node</code>, or
204      *         <code>null</code> if <code>node</code> has fewer than
205      *         <code>nth</code> text nodes as children
206      */
207     public static String getText(Node node, int nth) {
208         String text = null;
209         int numFound = -1;
210         NodeList children = node.getChildNodes();
211         for (int i = 0; i < children.getLength(); i++) {
212             Node child = children.item(i);
213             if (child.getNodeType() == Node.TEXT_NODE) {
214                 numFound++;
215                 if (numFound == nth) {
216                     text = child.getNodeValue().trim();
217                     break;
218                 }
219             }
220         }
221         return text;
222     }
223 
224     /**
225      * Returns the named attribute in a <code>Node</code>.
226      *
227      * @param node
228      *            the <code>Node</code> in which to look for attributes
229      * @param attributeName
230      *            the name of the attribute to look up
231      *
232      * @return the attribute node for the attribute named
233      *         <code>attributeName</code> in <code>Node</code>, or
234      *         <code>null</code> if there is no attribute with that name
235      */
236     public static Node getAttribute(Node node, String attributeName) {
237         NamedNodeMap attributes = node.getAttributes();
238         Node attributeValue = attributes.getNamedItem(attributeName);
239         return attributeValue;
240     }
241 
242     /**
243      * Returns the string value of a named attribute in a <code>Node</code>.
244      *
245      * @param node
246      *            the <code>Node</code> in which to look for attributes
247      * @param attributeName
248      *            the name of the attribute to look up
249      *
250      * @return the string value of the attribute named
251      *         <code>attributeName</code> in <code>Node</code>, or
252      *         <code>null</code> if there is no attribute with that name
253      */
254     public static String getAttributeText(Node node, String attributeName) {
255         String attributeText = null;
256         Node attribute = getAttribute(node, attributeName);
257         if (attribute != null) {
258             attributeText = getText(attribute);
259         }
260         return attributeText;
261     }
262 
263     /**
264      * Returns a string representation of a <code>Node</code> and its
265      * children, pretty-printed so as to include new-lines and with each
266      * sub-node indented.
267      *
268      * @param node
269      *            the <code>Node</code> to represent as a string
270      *
271      * @return a pretty-printed string representation of <code>Node</code>
272      */
273     public static String toString(Node node) {
274         return toString(node, true);
275     }
276 
277     /**
278      * Returns a string representation of a <code>Node</code> and its
279      * children.
280      *
281      * @param node
282      *            the <code>Node</code> to represent as a string
283      * @param prettyPrint
284      *            if <code>true</code>, the string representation of
285      *            <code>Node</code> will be pretty-printed, so as to include
286      *            new-lines and with each sub-node indented
287      *
288      * @return a string representation of <code>Node</code>
289      */
290     public static String toString(Node node, boolean prettyPrint) {
291         NodePrinter nodePrinter = new XMLUtil.NodePrinter(prettyPrint);
292         traverse(node, nodePrinter);
293         return nodePrinter.toString();
294     }
295 
296     /**
297      * Returns a copy of a string where all special XML characters have been
298      * replaced by the corresponding character entity reference, for example, "<"
299      * is replaced by "&lt;".
300      *
301      * @param original
302      *            the string to make safe to use in an XML document
303      *
304      * @return a copy of <code>original</code> with all special XML characters
305      *         replaced by the corresponding character entity reference
306      */
307     public static String makeXMLSafe(String original) {
308         StringBuffer s = new StringBuffer();
309         for (int i = 0; i < original.length(); i++) {
310             if (original.charAt(i) == '<') {
311                 s.append("&lt;");
312             } else if (original.charAt(i) == '>') {
313                 s.append("&gt;");
314             } else if (original.charAt(i) == '&') {
315                 s.append("&amp;");
316             } else {
317                 s.append(original.charAt(i));
318             }
319         }
320         return s.toString();
321     }
322 
323     //
324     // Nested top-level classes
325     //
326 
327     /**
328      * The <code>Visitor</code> interface is used when traversing XML
329      * documents. To use it, create a class that implements the interface and
330      * give it to the <code>traverse</code> and <code>find</code> methods.
331      *
332      * @see XMLUtil#traverse(Node, th.co.edge.jseq.util.XMLUtil.Visitor)
333      * @see XMLUtil#find(Node, th.co.edge.jseq.util.XMLUtil.Visitor)
334      */
335     public interface Visitor {
336         /**
337          * Called for every <code>Node</code> that is traversed, before the
338          * children are visited.
339          *
340          * @param node
341          *            the <code>Node</code> currently being visited.
342          *
343          * @return <code>true</code> if you want to stop the traversal, or if
344          *         you have found the <code>Node</code> you are looking for,
345          *         <code>false</code> otherwise
346          */
347         public boolean visit(Node node);
348 
349         /**
350          * Called after the children of the Node have been traversed.
351          *
352          * @param node
353          *            the <code>Node</code> whose children have just been
354          *            traversed
355          */
356         public void afterVisit(Node node);
357     }
358 
359     /**
360      * An implementation of <code>Visitor</code> that creates a string
361      * representation of a <code>Node</code> and its children, or in other
362      * words, to a (part of) an XML document.
363      *
364      * <p>
365      * To use this class, create a new <code>NodePrinter</code> instance, call
366      * <code>XMLUtil.traverse</code> using it, and then call the
367      * <code>toString</code> method on the <code>NodePrinter</code>.
368      */
369     public static class NodePrinter implements XMLUtil.Visitor {
370         private static final String NEW_LINE =
371                 System.getProperty("line.separator");
372 
373         private final StringBuffer stringBuffer = new StringBuffer();
374         private final boolean prettyPrint;
375         private int indent = 0;
376 
377         /**
378          * Creates a new <code>NodePrinter</code>.
379          *
380          * @param prettyPrint
381          *            if <code>true</code> string representation of an XML
382          *            document will contain new-lines, with each sub-node
383          *            indented
384          */
385         public NodePrinter(boolean prettyPrint) {
386             this.prettyPrint = prettyPrint;
387         }
388 
389         /**
390          * Adds the string representation appropriate at the beginning of the
391          * given <code>Node</code>. For example, for an element node, this
392          * would add the start tag with its attributes; for a document node,
393          * this would add the DOCTYPE declaration, and so on.
394          *
395          * @param node
396          *            the <code>Node</code> for which to generate a string
397          *            representation
398          *
399          * @return <code>false</code>, so as never to stop the traversal
400          */
401         public boolean visit(Node node) {
402             switch (node.getNodeType()) {
403             case Node.DOCUMENT_NODE:
404                 stringBuffer.append("<?xml version=\"1.0\"?>");
405                 newline();
406                 break;
407             case Node.DOCUMENT_TYPE_NODE:
408                 DocumentType docType = (DocumentType) node;
409                 if (docType.getPublicId() != null ||
410                         docType.getSystemId() != null) {
411                     stringBuffer.append("<!DOCTYPE " + docType.getName());
412                     if (docType.getPublicId() != null) {
413                         stringBuffer.append(" PUBLIC \"" +
414                                 docType.getPublicId() + "\" ");
415                         stringBuffer.append("\"" + docType.getSystemId() +
416                                 "\">");
417                     } else {
418                         stringBuffer.append(" SYSTEM \"" +
419                                 docType.getSystemId() + "\">");
420                     }
421                     newline();
422                 }
423                 break;
424             case Node.ELEMENT_NODE:
425                 indent();
426                 stringBuffer.append("<" + node.getNodeName());
427                 stringBuffer.append(getAttributes(node));
428                 // If this node is empty, add the trailing slash.
429                 // For example, "<br/>".
430                 if (node.getChildNodes().getLength() == 0) {
431                     stringBuffer.append("/");
432                     indent -= 2;
433                 }
434                 stringBuffer.append(">");
435                 // Print a newline unless the only child is text, to be printed
436                 // inline.
437                 // For example "<foo>This is text</foo>".
438                 if (node.getChildNodes().getLength() != 1 ||
439                         node.getFirstChild().getNodeType() != Node.TEXT_NODE) {
440                     newline();
441                     indent += 2;
442                 }
443                 break;
444             case Node.TEXT_NODE:
445                 stringBuffer.append(node.getNodeValue());
446                 break;
447             default:
448                 // Do nothing
449                 break;
450             }
451             return false;
452         }
453 
454         /**
455          * For an element node, adds the end tag for the given <code>Node</code>
456          * to the string representation of the XML document, if necessary.
457          *
458          * @param node
459          *            the <code>Node</code> being visited
460          */
461         public void afterVisit(Node node) {
462             if (node.getNodeType() == Node.ELEMENT_NODE &&
463                     node.getChildNodes().getLength() != 0) {
464                 // Indent unless the only child is text, printed inline.
465                 // For example "<foo>This is text</foo>".
466                 if (node.getChildNodes().getLength() != 1 ||
467                         node.getFirstChild().getNodeType() != Node.TEXT_NODE) {
468                     indent -= 2;
469                     indent();
470                 }
471                 stringBuffer.append("</" + node.getNodeName() + ">");
472                 newline();
473             }
474         }
475 
476         private void indent() {
477             if (prettyPrint) {
478                 for (int i = 0; i < indent; i++) {
479                     stringBuffer.append(" ");
480                 }
481             }
482         }
483 
484         private void newline() {
485             if (prettyPrint) {
486                 stringBuffer.append(NEW_LINE);
487             }
488         }
489 
490         private String getAttributes(Node node) {
491             StringBuffer sb = new StringBuffer();
492             NamedNodeMap attributes = node.getAttributes();
493             if (attributes != null) {
494                 for (int i = 0; i < attributes.getLength(); i++) {
495                     Node attribute = attributes.item(i);
496                     sb.append(" " + attribute.getNodeName() + "=\"" +
497                             attribute.getNodeValue() + "\"");
498                 }
499             }
500             return sb.toString();
501         }
502 
503         /**
504          * Returns a string representation of the <code>Node</code> that has
505          * been traversed using this <code>NodePrinter</code>, or the empty
506          * string if the <code>Node</code> has not yet been traversed.
507          *
508          * @return a string representation of the <code>Node</code> that has
509          *         been traversed
510          */
511         @Override
512         public String toString() {
513             return stringBuffer.toString();
514         }
515     }
516 
517     private static class TagFinder implements XMLUtil.Visitor {
518         private String tag;
519         private String text;
520 
521         public TagFinder(String tag, String text) {
522             this.tag = tag;
523             this.text = text;
524         }
525 
526         public boolean visit(Node node) {
527             boolean found = false;
528             if (node.getNodeType() == Node.ELEMENT_NODE &&
529                     node.getNodeName().equals(tag)) {
530                 if (text == null) {
531                     found = true;
532                 } else {
533                     String nodeText = XMLUtil.getText(node);
534                     if (nodeText != null) {
535                         found = nodeText.equals(text);
536                     }
537                 }
538             }
539             return found;
540         }
541 
542         public void afterVisit(Node node) {
543         }
544     }
545 
546     private static class TagCollector extends TagFinder {
547         private ArrayList<Node> nodes = new ArrayList<Node>();
548 
549         public TagCollector(String tag, String text) {
550             super(tag, text);
551         }
552 
553         @Override
554         public boolean visit(Node node) {
555             if (super.visit(node)) {
556                 nodes.add(node);
557             }
558             return false;
559         }
560 
561         public Node[] getNodes() {
562             Node[] nodeArray = new Node[nodes.size()];
563             nodes.toArray(nodeArray);
564             return nodeArray;
565         }
566     }
567 }