1 /*
2 * Copyright (c) 2003-2008, by Henrik Arro and Contributors
3 *
4 * This file is part of JSeq, a tool to automatically create
5 * sequence diagrams by tracing program execution.
6 *
7 * See <http://jseq.sourceforge.net> for more information.
8 *
9 * JSeq is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU Lesser General Public License as
11 * published by the Free Software Foundation, either version 3 of
12 * the License, or (at your option) any later version.
13 *
14 * JSeq is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with JSeq. If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 package th.co.edge.jseq.util;
24
25 import java.util.ArrayList;
26
27 import org.w3c.dom.DocumentType;
28 import org.w3c.dom.NamedNodeMap;
29 import org.w3c.dom.Node;
30 import org.w3c.dom.NodeList;
31
32 /**
33 * A utility class to work with <code>org.w3c.dom.Node</code> objects.
34 */
35 public class XMLUtil {
36 /**
37 * This class only contains static methods, so we hide the constructor.
38 */
39 private XMLUtil() {
40 }
41
42 /**
43 * Traverses the given <code>Node</code> and its children depth-first,
44 * calling the given <code>Visitor</code> for each node.
45 *
46 * <p>
47 * If the traversal should be stopped at a certain node, the
48 * <code>Visitor</code> should return <code>true</code> from the
49 * <code>visit</code> method for that node. Obviously, if the entire tree
50 * should be traversed, the <code>visit</code> method should always return
51 * <code>false</code>.
52 *
53 * @param node
54 * the root <code>Node</code> at which to start the traversal
55 * @param visitor
56 * the <code>Visitor</code>
57 *
58 * @see Visitor#visit(Node)
59 */
60 public static void traverse(Node node, Visitor visitor) {
61 find(node, visitor);
62 }
63
64 /**
65 * Searches the given <code>Node</code> and its children depth-first,
66 * looking for the first <code>Node</code> that satisfies the given
67 * <code>Visitor</code>.
68 *
69 * @param node
70 * the root <code>Node</code> at which to start the search
71 * @param visitor
72 * the <code>Visitor</code> that is called for every node, and
73 * determines if the right node has been found by returning
74 * <code>true</code> from the <code>visit</code> method
75 *
76 * @return the first <code>Node</code> in a depth-first search that
77 * satisfies <code>visitor</code>, or <code>null</code> if
78 * there is none.
79 *
80 * @see Visitor#visit(Node)
81 */
82 public static Node find(Node node, Visitor visitor) {
83 Node result = null;
84 if (visitor.visit(node)) {
85 result = node;
86 } else {
87 NodeList children = node.getChildNodes();
88 for (int i = 0; i < children.getLength(); i++) {
89 Node child = children.item(i);
90 result = find(child, visitor);
91 if (result != null) {
92 break;
93 }
94 }
95 }
96 visitor.afterVisit(node);
97 return result;
98 }
99
100 /**
101 * Searches a <code>Node</code> and its children depth-first for an
102 * element node with a given name.
103 *
104 * @param node
105 * the root <code>Node</code> at which to start the search
106 * @param tag
107 * the name of the tag to search for
108 *
109 * @return the <code>Node</code> found, or <code>null</code>
110 */
111 public static Node findTag(Node node, String tag) {
112 return findTag(node, tag, null);
113 }
114
115 /**
116 * Searches a <code>Node</code> and its children depth-first for an
117 * element node with a given name, and also containing a text sub-node with
118 * a given text.
119 *
120 * @param node
121 * the root <code>Node</code> at which to start the search
122 * @param tag
123 * the name of the tag to search for
124 * @param text
125 * a string that must be included in the first text sub-node of
126 * the found node, or <code>null</code> if only the tag name
127 * should be used to determine if a node matches
128 *
129 * @return the <code>Node</code> found, or <code>null</code>
130 */
131 public static Node findTag(Node node, String tag, String text) {
132 Node result = find(node, new TagFinder(tag, text));
133 return result;
134 }
135
136 /**
137 * Searches a <code>Node</code> and its children depth-first, returning an
138 * array with all element nodes with a given name.
139 *
140 * @param node
141 * the root <code>Node</code> at which to start the search
142 * @param tag
143 * the name of the tags to search for
144 *
145 * @return a <code>Node</code> array with all element nodes with the given
146 * <code>name</code> starting from <code>node</code>. If no
147 * such nodes exist, an empty array is returned
148 */
149 public static Node[] findAllTags(Node node, String tag) {
150 return findAllTags(node, tag, null);
151 }
152
153 /**
154 * Searches a <code>Node</code> and its children depth-first, returning an
155 * array with all element nodes with a given name, and also containing a
156 * text sub-node with a given text.
157 *
158 * @param node
159 * the root <code>Node</code> at which to start the search
160 * @param tag
161 * the name of the tags to search for
162 * @param text
163 * a string that must be included in the first text sub-node of
164 * the found node, or <code>null</code> if only the tag name
165 * should be used to determine if a node matches
166 *
167 * @return a <code>Node</code> array with all element nodes with the given
168 * <code>name</code> and containing a text node with
169 * <code>text</code>, starting from <code>node</code>. If no
170 * such nodes exist, an empty array is returned
171 *
172 */
173 public static Node[] findAllTags(Node node, String tag, String text) {
174 TagCollector tagCollector = new TagCollector(tag, text);
175 traverse(node, tagCollector);
176 return tagCollector.getNodes();
177 }
178
179 /**
180 * Returns the first text sub-node of the given <code>Node</code>.
181 *
182 * @param node
183 * the <code>Node</code> whose children to search for the first
184 * text node
185 *
186 * @return the first text sub-node of <code>node</code>, or
187 * <code>null</code> if there is none
188 */
189 public static String getText(Node node) {
190 String text = getText(node, 0);
191 return text;
192 }
193
194 /**
195 * Returns the nth text sub-node of the given <code>Node</code>.
196 *
197 * @param node
198 * the <code>Node</code> whose children to search for the
199 * <code>nth</code> text node
200 * @param nth
201 * the index of the text childe node to return
202 *
203 * @return the <code>nth</code> text sub-node of <code>node</code>, or
204 * <code>null</code> if <code>node</code> has fewer than
205 * <code>nth</code> text nodes as children
206 */
207 public static String getText(Node node, int nth) {
208 String text = null;
209 int numFound = -1;
210 NodeList children = node.getChildNodes();
211 for (int i = 0; i < children.getLength(); i++) {
212 Node child = children.item(i);
213 if (child.getNodeType() == Node.TEXT_NODE) {
214 numFound++;
215 if (numFound == nth) {
216 text = child.getNodeValue().trim();
217 break;
218 }
219 }
220 }
221 return text;
222 }
223
224 /**
225 * Returns the named attribute in a <code>Node</code>.
226 *
227 * @param node
228 * the <code>Node</code> in which to look for attributes
229 * @param attributeName
230 * the name of the attribute to look up
231 *
232 * @return the attribute node for the attribute named
233 * <code>attributeName</code> in <code>Node</code>, or
234 * <code>null</code> if there is no attribute with that name
235 */
236 public static Node getAttribute(Node node, String attributeName) {
237 NamedNodeMap attributes = node.getAttributes();
238 Node attributeValue = attributes.getNamedItem(attributeName);
239 return attributeValue;
240 }
241
242 /**
243 * Returns the string value of a named attribute in a <code>Node</code>.
244 *
245 * @param node
246 * the <code>Node</code> in which to look for attributes
247 * @param attributeName
248 * the name of the attribute to look up
249 *
250 * @return the string value of the attribute named
251 * <code>attributeName</code> in <code>Node</code>, or
252 * <code>null</code> if there is no attribute with that name
253 */
254 public static String getAttributeText(Node node, String attributeName) {
255 String attributeText = null;
256 Node attribute = getAttribute(node, attributeName);
257 if (attribute != null) {
258 attributeText = getText(attribute);
259 }
260 return attributeText;
261 }
262
263 /**
264 * Returns a string representation of a <code>Node</code> and its
265 * children, pretty-printed so as to include new-lines and with each
266 * sub-node indented.
267 *
268 * @param node
269 * the <code>Node</code> to represent as a string
270 *
271 * @return a pretty-printed string representation of <code>Node</code>
272 */
273 public static String toString(Node node) {
274 return toString(node, true);
275 }
276
277 /**
278 * Returns a string representation of a <code>Node</code> and its
279 * children.
280 *
281 * @param node
282 * the <code>Node</code> to represent as a string
283 * @param prettyPrint
284 * if <code>true</code>, the string representation of
285 * <code>Node</code> will be pretty-printed, so as to include
286 * new-lines and with each sub-node indented
287 *
288 * @return a string representation of <code>Node</code>
289 */
290 public static String toString(Node node, boolean prettyPrint) {
291 NodePrinter nodePrinter = new XMLUtil.NodePrinter(prettyPrint);
292 traverse(node, nodePrinter);
293 return nodePrinter.toString();
294 }
295
296 /**
297 * Returns a copy of a string where all special XML characters have been
298 * replaced by the corresponding character entity reference, for example, "<"
299 * is replaced by "<".
300 *
301 * @param original
302 * the string to make safe to use in an XML document
303 *
304 * @return a copy of <code>original</code> with all special XML characters
305 * replaced by the corresponding character entity reference
306 */
307 public static String makeXMLSafe(String original) {
308 StringBuffer s = new StringBuffer();
309 for (int i = 0; i < original.length(); i++) {
310 if (original.charAt(i) == '<') {
311 s.append("<");
312 } else if (original.charAt(i) == '>') {
313 s.append(">");
314 } else if (original.charAt(i) == '&') {
315 s.append("&");
316 } else {
317 s.append(original.charAt(i));
318 }
319 }
320 return s.toString();
321 }
322
323 //
324 // Nested top-level classes
325 //
326
327 /**
328 * The <code>Visitor</code> interface is used when traversing XML
329 * documents. To use it, create a class that implements the interface and
330 * give it to the <code>traverse</code> and <code>find</code> methods.
331 *
332 * @see XMLUtil#traverse(Node, th.co.edge.jseq.util.XMLUtil.Visitor)
333 * @see XMLUtil#find(Node, th.co.edge.jseq.util.XMLUtil.Visitor)
334 */
335 public interface Visitor {
336 /**
337 * Called for every <code>Node</code> that is traversed, before the
338 * children are visited.
339 *
340 * @param node
341 * the <code>Node</code> currently being visited.
342 *
343 * @return <code>true</code> if you want to stop the traversal, or if
344 * you have found the <code>Node</code> you are looking for,
345 * <code>false</code> otherwise
346 */
347 public boolean visit(Node node);
348
349 /**
350 * Called after the children of the Node have been traversed.
351 *
352 * @param node
353 * the <code>Node</code> whose children have just been
354 * traversed
355 */
356 public void afterVisit(Node node);
357 }
358
359 /**
360 * An implementation of <code>Visitor</code> that creates a string
361 * representation of a <code>Node</code> and its children, or in other
362 * words, to a (part of) an XML document.
363 *
364 * <p>
365 * To use this class, create a new <code>NodePrinter</code> instance, call
366 * <code>XMLUtil.traverse</code> using it, and then call the
367 * <code>toString</code> method on the <code>NodePrinter</code>.
368 */
369 public static class NodePrinter implements XMLUtil.Visitor {
370 private static final String NEW_LINE =
371 System.getProperty("line.separator");
372
373 private final StringBuffer stringBuffer = new StringBuffer();
374 private final boolean prettyPrint;
375 private int indent = 0;
376
377 /**
378 * Creates a new <code>NodePrinter</code>.
379 *
380 * @param prettyPrint
381 * if <code>true</code> string representation of an XML
382 * document will contain new-lines, with each sub-node
383 * indented
384 */
385 public NodePrinter(boolean prettyPrint) {
386 this.prettyPrint = prettyPrint;
387 }
388
389 /**
390 * Adds the string representation appropriate at the beginning of the
391 * given <code>Node</code>. For example, for an element node, this
392 * would add the start tag with its attributes; for a document node,
393 * this would add the DOCTYPE declaration, and so on.
394 *
395 * @param node
396 * the <code>Node</code> for which to generate a string
397 * representation
398 *
399 * @return <code>false</code>, so as never to stop the traversal
400 */
401 public boolean visit(Node node) {
402 switch (node.getNodeType()) {
403 case Node.DOCUMENT_NODE:
404 stringBuffer.append("<?xml version=\"1.0\"?>");
405 newline();
406 break;
407 case Node.DOCUMENT_TYPE_NODE:
408 DocumentType docType = (DocumentType) node;
409 if (docType.getPublicId() != null ||
410 docType.getSystemId() != null) {
411 stringBuffer.append("<!DOCTYPE " + docType.getName());
412 if (docType.getPublicId() != null) {
413 stringBuffer.append(" PUBLIC \"" +
414 docType.getPublicId() + "\" ");
415 stringBuffer.append("\"" + docType.getSystemId() +
416 "\">");
417 } else {
418 stringBuffer.append(" SYSTEM \"" +
419 docType.getSystemId() + "\">");
420 }
421 newline();
422 }
423 break;
424 case Node.ELEMENT_NODE:
425 indent();
426 stringBuffer.append("<" + node.getNodeName());
427 stringBuffer.append(getAttributes(node));
428 // If this node is empty, add the trailing slash.
429 // For example, "<br/>".
430 if (node.getChildNodes().getLength() == 0) {
431 stringBuffer.append("/");
432 indent -= 2;
433 }
434 stringBuffer.append(">");
435 // Print a newline unless the only child is text, to be printed
436 // inline.
437 // For example "<foo>This is text</foo>".
438 if (node.getChildNodes().getLength() != 1 ||
439 node.getFirstChild().getNodeType() != Node.TEXT_NODE) {
440 newline();
441 indent += 2;
442 }
443 break;
444 case Node.TEXT_NODE:
445 stringBuffer.append(node.getNodeValue());
446 break;
447 default:
448 // Do nothing
449 break;
450 }
451 return false;
452 }
453
454 /**
455 * For an element node, adds the end tag for the given <code>Node</code>
456 * to the string representation of the XML document, if necessary.
457 *
458 * @param node
459 * the <code>Node</code> being visited
460 */
461 public void afterVisit(Node node) {
462 if (node.getNodeType() == Node.ELEMENT_NODE &&
463 node.getChildNodes().getLength() != 0) {
464 // Indent unless the only child is text, printed inline.
465 // For example "<foo>This is text</foo>".
466 if (node.getChildNodes().getLength() != 1 ||
467 node.getFirstChild().getNodeType() != Node.TEXT_NODE) {
468 indent -= 2;
469 indent();
470 }
471 stringBuffer.append("</" + node.getNodeName() + ">");
472 newline();
473 }
474 }
475
476 private void indent() {
477 if (prettyPrint) {
478 for (int i = 0; i < indent; i++) {
479 stringBuffer.append(" ");
480 }
481 }
482 }
483
484 private void newline() {
485 if (prettyPrint) {
486 stringBuffer.append(NEW_LINE);
487 }
488 }
489
490 private String getAttributes(Node node) {
491 StringBuffer sb = new StringBuffer();
492 NamedNodeMap attributes = node.getAttributes();
493 if (attributes != null) {
494 for (int i = 0; i < attributes.getLength(); i++) {
495 Node attribute = attributes.item(i);
496 sb.append(" " + attribute.getNodeName() + "=\"" +
497 attribute.getNodeValue() + "\"");
498 }
499 }
500 return sb.toString();
501 }
502
503 /**
504 * Returns a string representation of the <code>Node</code> that has
505 * been traversed using this <code>NodePrinter</code>, or the empty
506 * string if the <code>Node</code> has not yet been traversed.
507 *
508 * @return a string representation of the <code>Node</code> that has
509 * been traversed
510 */
511 @Override
512 public String toString() {
513 return stringBuffer.toString();
514 }
515 }
516
517 private static class TagFinder implements XMLUtil.Visitor {
518 private String tag;
519 private String text;
520
521 public TagFinder(String tag, String text) {
522 this.tag = tag;
523 this.text = text;
524 }
525
526 public boolean visit(Node node) {
527 boolean found = false;
528 if (node.getNodeType() == Node.ELEMENT_NODE &&
529 node.getNodeName().equals(tag)) {
530 if (text == null) {
531 found = true;
532 } else {
533 String nodeText = XMLUtil.getText(node);
534 if (nodeText != null) {
535 found = nodeText.equals(text);
536 }
537 }
538 }
539 return found;
540 }
541
542 public void afterVisit(Node node) {
543 }
544 }
545
546 private static class TagCollector extends TagFinder {
547 private ArrayList<Node> nodes = new ArrayList<Node>();
548
549 public TagCollector(String tag, String text) {
550 super(tag, text);
551 }
552
553 @Override
554 public boolean visit(Node node) {
555 if (super.visit(node)) {
556 nodes.add(node);
557 }
558 return false;
559 }
560
561 public Node[] getNodes() {
562 Node[] nodeArray = new Node[nodes.size()];
563 nodes.toArray(nodeArray);
564 return nodeArray;
565 }
566 }
567 }