1 /* 2 * Copyright (c) 2003-2008, by Henrik Arro and Contributors 3 * 4 * This file is part of JSeq, a tool to automatically create 5 * sequence diagrams by tracing program execution. 6 * 7 * See <http://jseq.sourceforge.net> for more information. 8 * 9 * JSeq is free software: you can redistribute it and/or modify 10 * it under the terms of the GNU Lesser General Public License as 11 * published by the Free Software Foundation, either version 3 of 12 * the License, or (at your option) any later version. 13 * 14 * JSeq is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public License 20 * along with JSeq. If not, see <http://www.gnu.org/licenses/>. 21 */ 22 23 package th.co.edge.jseq.util; 24 25 import java.util.ArrayList; 26 27 import org.w3c.dom.DocumentType; 28 import org.w3c.dom.NamedNodeMap; 29 import org.w3c.dom.Node; 30 import org.w3c.dom.NodeList; 31 32 /** 33 * A utility class to work with <code>org.w3c.dom.Node</code> objects. 34 */ 35 public class XMLUtil { 36 /** 37 * This class only contains static methods, so we hide the constructor. 38 */ 39 private XMLUtil() { 40 } 41 42 /** 43 * Traverses the given <code>Node</code> and its children depth-first, 44 * calling the given <code>Visitor</code> for each node. 45 * 46 * <p> 47 * If the traversal should be stopped at a certain node, the 48 * <code>Visitor</code> should return <code>true</code> from the 49 * <code>visit</code> method for that node. Obviously, if the entire tree 50 * should be traversed, the <code>visit</code> method should always return 51 * <code>false</code>. 52 * 53 * @param node 54 * the root <code>Node</code> at which to start the traversal 55 * @param visitor 56 * the <code>Visitor</code> 57 * 58 * @see Visitor#visit(Node) 59 */ 60 public static void traverse(Node node, Visitor visitor) { 61 find(node, visitor); 62 } 63 64 /** 65 * Searches the given <code>Node</code> and its children depth-first, 66 * looking for the first <code>Node</code> that satisfies the given 67 * <code>Visitor</code>. 68 * 69 * @param node 70 * the root <code>Node</code> at which to start the search 71 * @param visitor 72 * the <code>Visitor</code> that is called for every node, and 73 * determines if the right node has been found by returning 74 * <code>true</code> from the <code>visit</code> method 75 * 76 * @return the first <code>Node</code> in a depth-first search that 77 * satisfies <code>visitor</code>, or <code>null</code> if 78 * there is none. 79 * 80 * @see Visitor#visit(Node) 81 */ 82 public static Node find(Node node, Visitor visitor) { 83 Node result = null; 84 if (visitor.visit(node)) { 85 result = node; 86 } else { 87 NodeList children = node.getChildNodes(); 88 for (int i = 0; i < children.getLength(); i++) { 89 Node child = children.item(i); 90 result = find(child, visitor); 91 if (result != null) { 92 break; 93 } 94 } 95 } 96 visitor.afterVisit(node); 97 return result; 98 } 99 100 /** 101 * Searches a <code>Node</code> and its children depth-first for an 102 * element node with a given name. 103 * 104 * @param node 105 * the root <code>Node</code> at which to start the search 106 * @param tag 107 * the name of the tag to search for 108 * 109 * @return the <code>Node</code> found, or <code>null</code> 110 */ 111 public static Node findTag(Node node, String tag) { 112 return findTag(node, tag, null); 113 } 114 115 /** 116 * Searches a <code>Node</code> and its children depth-first for an 117 * element node with a given name, and also containing a text sub-node with 118 * a given text. 119 * 120 * @param node 121 * the root <code>Node</code> at which to start the search 122 * @param tag 123 * the name of the tag to search for 124 * @param text 125 * a string that must be included in the first text sub-node of 126 * the found node, or <code>null</code> if only the tag name 127 * should be used to determine if a node matches 128 * 129 * @return the <code>Node</code> found, or <code>null</code> 130 */ 131 public static Node findTag(Node node, String tag, String text) { 132 Node result = find(node, new TagFinder(tag, text)); 133 return result; 134 } 135 136 /** 137 * Searches a <code>Node</code> and its children depth-first, returning an 138 * array with all element nodes with a given name. 139 * 140 * @param node 141 * the root <code>Node</code> at which to start the search 142 * @param tag 143 * the name of the tags to search for 144 * 145 * @return a <code>Node</code> array with all element nodes with the given 146 * <code>name</code> starting from <code>node</code>. If no 147 * such nodes exist, an empty array is returned 148 */ 149 public static Node[] findAllTags(Node node, String tag) { 150 return findAllTags(node, tag, null); 151 } 152 153 /** 154 * Searches a <code>Node</code> and its children depth-first, returning an 155 * array with all element nodes with a given name, and also containing a 156 * text sub-node with a given text. 157 * 158 * @param node 159 * the root <code>Node</code> at which to start the search 160 * @param tag 161 * the name of the tags to search for 162 * @param text 163 * a string that must be included in the first text sub-node of 164 * the found node, or <code>null</code> if only the tag name 165 * should be used to determine if a node matches 166 * 167 * @return a <code>Node</code> array with all element nodes with the given 168 * <code>name</code> and containing a text node with 169 * <code>text</code>, starting from <code>node</code>. If no 170 * such nodes exist, an empty array is returned 171 * 172 */ 173 public static Node[] findAllTags(Node node, String tag, String text) { 174 TagCollector tagCollector = new TagCollector(tag, text); 175 traverse(node, tagCollector); 176 return tagCollector.getNodes(); 177 } 178 179 /** 180 * Returns the first text sub-node of the given <code>Node</code>. 181 * 182 * @param node 183 * the <code>Node</code> whose children to search for the first 184 * text node 185 * 186 * @return the first text sub-node of <code>node</code>, or 187 * <code>null</code> if there is none 188 */ 189 public static String getText(Node node) { 190 String text = getText(node, 0); 191 return text; 192 } 193 194 /** 195 * Returns the nth text sub-node of the given <code>Node</code>. 196 * 197 * @param node 198 * the <code>Node</code> whose children to search for the 199 * <code>nth</code> text node 200 * @param nth 201 * the index of the text childe node to return 202 * 203 * @return the <code>nth</code> text sub-node of <code>node</code>, or 204 * <code>null</code> if <code>node</code> has fewer than 205 * <code>nth</code> text nodes as children 206 */ 207 public static String getText(Node node, int nth) { 208 String text = null; 209 int numFound = -1; 210 NodeList children = node.getChildNodes(); 211 for (int i = 0; i < children.getLength(); i++) { 212 Node child = children.item(i); 213 if (child.getNodeType() == Node.TEXT_NODE) { 214 numFound++; 215 if (numFound == nth) { 216 text = child.getNodeValue().trim(); 217 break; 218 } 219 } 220 } 221 return text; 222 } 223 224 /** 225 * Returns the named attribute in a <code>Node</code>. 226 * 227 * @param node 228 * the <code>Node</code> in which to look for attributes 229 * @param attributeName 230 * the name of the attribute to look up 231 * 232 * @return the attribute node for the attribute named 233 * <code>attributeName</code> in <code>Node</code>, or 234 * <code>null</code> if there is no attribute with that name 235 */ 236 public static Node getAttribute(Node node, String attributeName) { 237 NamedNodeMap attributes = node.getAttributes(); 238 Node attributeValue = attributes.getNamedItem(attributeName); 239 return attributeValue; 240 } 241 242 /** 243 * Returns the string value of a named attribute in a <code>Node</code>. 244 * 245 * @param node 246 * the <code>Node</code> in which to look for attributes 247 * @param attributeName 248 * the name of the attribute to look up 249 * 250 * @return the string value of the attribute named 251 * <code>attributeName</code> in <code>Node</code>, or 252 * <code>null</code> if there is no attribute with that name 253 */ 254 public static String getAttributeText(Node node, String attributeName) { 255 String attributeText = null; 256 Node attribute = getAttribute(node, attributeName); 257 if (attribute != null) { 258 attributeText = getText(attribute); 259 } 260 return attributeText; 261 } 262 263 /** 264 * Returns a string representation of a <code>Node</code> and its 265 * children, pretty-printed so as to include new-lines and with each 266 * sub-node indented. 267 * 268 * @param node 269 * the <code>Node</code> to represent as a string 270 * 271 * @return a pretty-printed string representation of <code>Node</code> 272 */ 273 public static String toString(Node node) { 274 return toString(node, true); 275 } 276 277 /** 278 * Returns a string representation of a <code>Node</code> and its 279 * children. 280 * 281 * @param node 282 * the <code>Node</code> to represent as a string 283 * @param prettyPrint 284 * if <code>true</code>, the string representation of 285 * <code>Node</code> will be pretty-printed, so as to include 286 * new-lines and with each sub-node indented 287 * 288 * @return a string representation of <code>Node</code> 289 */ 290 public static String toString(Node node, boolean prettyPrint) { 291 NodePrinter nodePrinter = new XMLUtil.NodePrinter(prettyPrint); 292 traverse(node, nodePrinter); 293 return nodePrinter.toString(); 294 } 295 296 /** 297 * Returns a copy of a string where all special XML characters have been 298 * replaced by the corresponding character entity reference, for example, "<" 299 * is replaced by "<". 300 * 301 * @param original 302 * the string to make safe to use in an XML document 303 * 304 * @return a copy of <code>original</code> with all special XML characters 305 * replaced by the corresponding character entity reference 306 */ 307 public static String makeXMLSafe(String original) { 308 StringBuffer s = new StringBuffer(); 309 for (int i = 0; i < original.length(); i++) { 310 if (original.charAt(i) == '<') { 311 s.append("<"); 312 } else if (original.charAt(i) == '>') { 313 s.append(">"); 314 } else if (original.charAt(i) == '&') { 315 s.append("&"); 316 } else { 317 s.append(original.charAt(i)); 318 } 319 } 320 return s.toString(); 321 } 322 323 // 324 // Nested top-level classes 325 // 326 327 /** 328 * The <code>Visitor</code> interface is used when traversing XML 329 * documents. To use it, create a class that implements the interface and 330 * give it to the <code>traverse</code> and <code>find</code> methods. 331 * 332 * @see XMLUtil#traverse(Node, th.co.edge.jseq.util.XMLUtil.Visitor) 333 * @see XMLUtil#find(Node, th.co.edge.jseq.util.XMLUtil.Visitor) 334 */ 335 public interface Visitor { 336 /** 337 * Called for every <code>Node</code> that is traversed, before the 338 * children are visited. 339 * 340 * @param node 341 * the <code>Node</code> currently being visited. 342 * 343 * @return <code>true</code> if you want to stop the traversal, or if 344 * you have found the <code>Node</code> you are looking for, 345 * <code>false</code> otherwise 346 */ 347 public boolean visit(Node node); 348 349 /** 350 * Called after the children of the Node have been traversed. 351 * 352 * @param node 353 * the <code>Node</code> whose children have just been 354 * traversed 355 */ 356 public void afterVisit(Node node); 357 } 358 359 /** 360 * An implementation of <code>Visitor</code> that creates a string 361 * representation of a <code>Node</code> and its children, or in other 362 * words, to a (part of) an XML document. 363 * 364 * <p> 365 * To use this class, create a new <code>NodePrinter</code> instance, call 366 * <code>XMLUtil.traverse</code> using it, and then call the 367 * <code>toString</code> method on the <code>NodePrinter</code>. 368 */ 369 public static class NodePrinter implements XMLUtil.Visitor { 370 private static final String NEW_LINE = 371 System.getProperty("line.separator"); 372 373 private final StringBuffer stringBuffer = new StringBuffer(); 374 private final boolean prettyPrint; 375 private int indent = 0; 376 377 /** 378 * Creates a new <code>NodePrinter</code>. 379 * 380 * @param prettyPrint 381 * if <code>true</code> string representation of an XML 382 * document will contain new-lines, with each sub-node 383 * indented 384 */ 385 public NodePrinter(boolean prettyPrint) { 386 this.prettyPrint = prettyPrint; 387 } 388 389 /** 390 * Adds the string representation appropriate at the beginning of the 391 * given <code>Node</code>. For example, for an element node, this 392 * would add the start tag with its attributes; for a document node, 393 * this would add the DOCTYPE declaration, and so on. 394 * 395 * @param node 396 * the <code>Node</code> for which to generate a string 397 * representation 398 * 399 * @return <code>false</code>, so as never to stop the traversal 400 */ 401 public boolean visit(Node node) { 402 switch (node.getNodeType()) { 403 case Node.DOCUMENT_NODE: 404 stringBuffer.append("<?xml version=\"1.0\"?>"); 405 newline(); 406 break; 407 case Node.DOCUMENT_TYPE_NODE: 408 DocumentType docType = (DocumentType) node; 409 if (docType.getPublicId() != null || 410 docType.getSystemId() != null) { 411 stringBuffer.append("<!DOCTYPE " + docType.getName()); 412 if (docType.getPublicId() != null) { 413 stringBuffer.append(" PUBLIC \"" + 414 docType.getPublicId() + "\" "); 415 stringBuffer.append("\"" + docType.getSystemId() + 416 "\">"); 417 } else { 418 stringBuffer.append(" SYSTEM \"" + 419 docType.getSystemId() + "\">"); 420 } 421 newline(); 422 } 423 break; 424 case Node.ELEMENT_NODE: 425 indent(); 426 stringBuffer.append("<" + node.getNodeName()); 427 stringBuffer.append(getAttributes(node)); 428 // If this node is empty, add the trailing slash. 429 // For example, "<br/>". 430 if (node.getChildNodes().getLength() == 0) { 431 stringBuffer.append("/"); 432 indent -= 2; 433 } 434 stringBuffer.append(">"); 435 // Print a newline unless the only child is text, to be printed 436 // inline. 437 // For example "<foo>This is text</foo>". 438 if (node.getChildNodes().getLength() != 1 || 439 node.getFirstChild().getNodeType() != Node.TEXT_NODE) { 440 newline(); 441 indent += 2; 442 } 443 break; 444 case Node.TEXT_NODE: 445 stringBuffer.append(node.getNodeValue()); 446 break; 447 default: 448 // Do nothing 449 break; 450 } 451 return false; 452 } 453 454 /** 455 * For an element node, adds the end tag for the given <code>Node</code> 456 * to the string representation of the XML document, if necessary. 457 * 458 * @param node 459 * the <code>Node</code> being visited 460 */ 461 public void afterVisit(Node node) { 462 if (node.getNodeType() == Node.ELEMENT_NODE && 463 node.getChildNodes().getLength() != 0) { 464 // Indent unless the only child is text, printed inline. 465 // For example "<foo>This is text</foo>". 466 if (node.getChildNodes().getLength() != 1 || 467 node.getFirstChild().getNodeType() != Node.TEXT_NODE) { 468 indent -= 2; 469 indent(); 470 } 471 stringBuffer.append("</" + node.getNodeName() + ">"); 472 newline(); 473 } 474 } 475 476 private void indent() { 477 if (prettyPrint) { 478 for (int i = 0; i < indent; i++) { 479 stringBuffer.append(" "); 480 } 481 } 482 } 483 484 private void newline() { 485 if (prettyPrint) { 486 stringBuffer.append(NEW_LINE); 487 } 488 } 489 490 private String getAttributes(Node node) { 491 StringBuffer sb = new StringBuffer(); 492 NamedNodeMap attributes = node.getAttributes(); 493 if (attributes != null) { 494 for (int i = 0; i < attributes.getLength(); i++) { 495 Node attribute = attributes.item(i); 496 sb.append(" " + attribute.getNodeName() + "=\"" + 497 attribute.getNodeValue() + "\""); 498 } 499 } 500 return sb.toString(); 501 } 502 503 /** 504 * Returns a string representation of the <code>Node</code> that has 505 * been traversed using this <code>NodePrinter</code>, or the empty 506 * string if the <code>Node</code> has not yet been traversed. 507 * 508 * @return a string representation of the <code>Node</code> that has 509 * been traversed 510 */ 511 @Override 512 public String toString() { 513 return stringBuffer.toString(); 514 } 515 } 516 517 private static class TagFinder implements XMLUtil.Visitor { 518 private String tag; 519 private String text; 520 521 public TagFinder(String tag, String text) { 522 this.tag = tag; 523 this.text = text; 524 } 525 526 public boolean visit(Node node) { 527 boolean found = false; 528 if (node.getNodeType() == Node.ELEMENT_NODE && 529 node.getNodeName().equals(tag)) { 530 if (text == null) { 531 found = true; 532 } else { 533 String nodeText = XMLUtil.getText(node); 534 if (nodeText != null) { 535 found = nodeText.equals(text); 536 } 537 } 538 } 539 return found; 540 } 541 542 public void afterVisit(Node node) { 543 } 544 } 545 546 private static class TagCollector extends TagFinder { 547 private ArrayList<Node> nodes = new ArrayList<Node>(); 548 549 public TagCollector(String tag, String text) { 550 super(tag, text); 551 } 552 553 @Override 554 public boolean visit(Node node) { 555 if (super.visit(node)) { 556 nodes.add(node); 557 } 558 return false; 559 } 560 561 public Node[] getNodes() { 562 Node[] nodeArray = new Node[nodes.size()]; 563 nodes.toArray(nodeArray); 564 return nodeArray; 565 } 566 } 567 }