001 /* 002 * Copyright 2009 - 2010 JEuclid, http://jeuclid.sf.net 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 /* $Id $ */ 018 019 package net.sourceforge.jeuclid.biparser; 020 021 import java.io.StringReader; 022 023 import org.apache.commons.logging.Log; 024 import org.apache.commons.logging.LogFactory; 025 import org.xml.sax.Attributes; 026 import org.xml.sax.InputSource; 027 import org.xml.sax.Locator; 028 import org.xml.sax.SAXException; 029 import org.xml.sax.helpers.DefaultHandler; 030 031 /** 032 * this class is used for SAX parsing. it builds a BiTree out of a text while 033 * parsing 034 * 035 * @version $Revision: 006d990a217c $ 036 * 037 */ 038 public class JEuclidSAXHandler extends DefaultHandler { 039 040 /** 041 * Logger for this class 042 */ 043 private static final Log LOGGER = LogFactory 044 .getLog(JEuclidSAXHandler.class); 045 046 /** stores characters while parsing (text of TextNodes). */ 047 private StringBuffer textBuffer; 048 049 /** locater for X&Y-position in inputtext. */ 050 private Locator locator; 051 052 /** BiTreeCreationHelper. */ 053 private final BiTreeCreationHelper treeHelper; 054 055 /** inputtext to parse. */ 056 private final String content; 057 058 /** current position in inputtext. */ 059 private int position; 060 061 /** previous position in inputtext. */ 062 private int previousPosition; 063 064 /** last line (y-position) in inputtext. */ 065 private int lastLine; 066 067 /** last column (x-position) in inputtext. */ 068 private int lastColumn; 069 070 /** BiTree to construct. */ 071 private final BiTree tree; 072 073 /** 074 * create a new SAX-Handler for parsing and creating a BiTree. 075 * 076 * @param c 077 * inputtext to parse 078 * @param t 079 * BiTree to construct 080 */ 081 public JEuclidSAXHandler(final String c, final BiTree t) { 082 this.position = 0; 083 this.previousPosition = 0; 084 this.lastLine = 1; 085 this.lastColumn = 1; 086 087 this.content = c; 088 this.tree = t; 089 this.treeHelper = new BiTreeCreationHelper(); 090 } 091 092 /** 093 * set the document locator. 094 * 095 * @param l 096 * locator 097 */ 098 @Override 099 public final void setDocumentLocator(final Locator l) { 100 this.locator = l; 101 } 102 103 /** 104 * stop resolving of entities (dtd). 105 * 106 * @param publicId 107 * publicId 108 * @param systemId 109 * systemId 110 * @return empty InputSource 111 */ 112 @Override 113 public final InputSource resolveEntity(final String publicId, 114 final String systemId) { 115 return new InputSource(new StringReader("")); 116 } 117 118 // =========================================================== 119 // SAX DocumentHandler methods 120 // =========================================================== 121 /** 122 * start document. 123 */ 124 @Override 125 public final void startDocument() { 126 JEuclidSAXHandler.LOGGER.debug("SAX start document, length=" 127 + this.content.length()); 128 } 129 130 /** 131 * end document. 132 * 133 * @throws SAXException 134 * if a sax parse exception occurs 135 */ 136 @Override 137 public final void endDocument() throws SAXException { 138 this.tree.setRoot(this.treeHelper.getRoot()); 139 JEuclidSAXHandler.LOGGER.debug("SAX end document"); 140 } 141 142 /** 143 * start element, called at end of every new open tag. 144 * 145 * @param namespaceURI 146 * namespace 147 * @param sName 148 * simple name 149 * @param qName 150 * qualified name 151 * @param attrs 152 * attributes of node 153 * @throws SAXException 154 * if a sax parse exception occurs 155 */ 156 @Override 157 public final void startElement(final String namespaceURI, 158 final String sName, final String qName, final Attributes attrs) 159 throws SAXException { 160 161 int startPosition; 162 int length; 163 // element name 164 String eName; 165 166 eName = sName; 167 if ("".equals(eName)) { 168 // not namespaceAware 169 eName = qName; 170 } 171 172 // get current position in inputtext 173 this.contentPosition(); 174 175 // get startposition of tag 176 startPosition = this.content.lastIndexOf("<" + eName, 177 this.position - 1); 178 179 if (this.textBuffer == null) { 180 length = 0; 181 } else { 182 length = this.textBuffer.length(); 183 } 184 185 JEuclidSAXHandler.LOGGER.debug("tag-start=" + startPosition 186 + " tag-end=" + this.position + " buffer=" 187 + (startPosition - this.previousPosition) + " textbuffer=" 188 + length); 189 190 // create a EmptyNode if text is before this element 191 if (startPosition - this.previousPosition > 0) { 192 JEuclidSAXHandler.LOGGER.debug("empty length=" 193 + (startPosition - this.previousPosition)); 194 195 this.treeHelper.createEmtpyNode(startPosition 196 - this.previousPosition); 197 this.textBuffer = null; 198 } 199 200 this.printElement(namespaceURI, eName, true, startPosition, attrs); 201 202 // create new BiNode 203 this.treeHelper.createBiNode(startPosition, this.position 204 - startPosition, namespaceURI, eName, attrs); 205 } 206 207 /** 208 * end element, called at end of every close tag. 209 * 210 * @param namespaceURI 211 * namespace 212 * @param sName 213 * simple name 214 * @param qName 215 * qulified name 216 */ 217 @Override 218 public final void endElement(final String namespaceURI, 219 final String sName, final String qName) { 220 // element name 221 String eName = sName; 222 // text of a TextNode before close tag 223 String text; 224 // length of TextNode of EmptyNode 225 int textLength; 226 final String apo = "'"; 227 228 if ("".equals(eName)) { 229 // not namespaceAware 230 eName = qName; 231 } 232 233 // get current position in inputtext (end-position of close tag) 234 this.contentPosition(); 235 236 // length of text before close tag 237 textLength = this.content.lastIndexOf("</", this.position - 1) 238 - this.previousPosition; 239 240 // create a new TextNode 241 if (this.textBuffer != null && this.textBuffer.length() > 0 242 && this.treeHelper.allowNewTextNode()) { 243 244 text = this.textBuffer.toString(); 245 this.treeHelper.createTextNode(textLength, text); 246 this.textBuffer = null; 247 248 JEuclidSAXHandler.LOGGER.debug(apo 249 + text.replaceAll(this.nl(), "#") + apo); 250 251 } else if (!this.treeHelper.allowNewTextNode() && textLength > 0) { 252 // or create a new EmptyNode 253 this.treeHelper.createEmtpyNode(textLength); 254 } 255 256 /** close current BiNode in tree (set length of node) */ 257 this.treeHelper.closeBiNode(this.position); 258 259 this.printElement(namespaceURI, eName, false, this.position, null); 260 } 261 262 /** 263 * concat characters while parsing. 264 * 265 * @param buf 266 * inputtext 267 * @param offset 268 * offset of characters to inputtext 269 * @param len 270 * number of characters 271 * @throws SAXException 272 * if a sax parse exception occurs 273 */ 274 @Override 275 public final void characters(final char[] buf, final int offset, 276 final int len) throws SAXException { 277 final String s = new String(buf, offset, len); 278 279 if (this.textBuffer == null) { 280 this.textBuffer = new StringBuffer(s); 281 } else { 282 this.textBuffer.append(s); 283 } 284 } 285 286 // =========================================================== 287 // Utility Methods ... 288 // =========================================================== 289 /** 290 * calculate current position in inputtext. 291 */ 292 private void contentPosition() { 293 final int line = this.locator.getLineNumber(); 294 final int column = this.locator.getColumnNumber(); 295 int l; 296 297 this.previousPosition = this.position; 298 299 JEuclidSAXHandler.LOGGER.debug("old line=" + this.lastLine); 300 for (l = this.lastLine; l < line; l = l + 1) { 301 this.position = 1 + this.content 302 .indexOf(this.nl(), this.position); 303 // System.out.println(" position = " + position + " "); 304 } 305 306 if (this.lastLine == line) { 307 // tag is in same line as previous 308 this.position += column - this.lastColumn; 309 } else { 310 // position += column - 1; 311 // position += column - nl().length(); 312 this.position += column - 2 + this.nl().length(); 313 } 314 315 this.lastLine = line; 316 this.lastColumn = column; 317 JEuclidSAXHandler.LOGGER.debug(" - new line=" + this.lastLine 318 + " - old pos=" + this.previousPosition + " new pos=" 319 + this.position); 320 } 321 322 /** 323 * print information about an elment. 324 * 325 * @param namespaceURI 326 * namespace 327 * @param name 328 * of tag 329 * @param open 330 * if true output an open tag, else close tag 331 * @param pos 332 * position of tag 333 * @param attrs 334 * attributes of tag 335 * 336 */ 337 private void printElement(final String namespaceURI, final String name, 338 final boolean open, final int pos, final Attributes attrs) { 339 final StringBuffer sb = new StringBuffer(32); 340 341 sb.append(this.position()); 342 sb.append(" - "); 343 sb.append(pos); 344 345 if (open) { 346 sb.append(" <"); 347 } else { 348 sb.append(" </"); 349 } 350 351 sb.append(name); 352 353 if (attrs != null) { 354 for (int i = 0; i < attrs.getLength(); i = i + 1) { 355 // Attr name 356 String aName = attrs.getLocalName(i); 357 358 if ("".equals(aName)) { 359 aName = attrs.getQName(i); 360 } 361 362 sb.append(' '); 363 sb.append(aName + "=\"" + attrs.getValue(i) + "\""); 364 } 365 } 366 367 if (namespaceURI != null && namespaceURI.length() > 0) { 368 sb.append(' '); 369 sb.append(namespaceURI); 370 } 371 sb.append('>'); 372 sb.append(this.nl()); 373 374 JEuclidSAXHandler.LOGGER.debug(sb.toString()); 375 } 376 377 /** 378 * get newline character. 379 * 380 * @return newline 381 */ 382 private String nl() { 383 // workaround for some problems with OS dependency 384 return "\n"; 385 // return System.getProperty("line.separator"); 386 } 387 388 /** 389 * print current x/y-position. 390 * 391 * @return current x/y-position 392 */ 393 private String position() { 394 final int line = this.locator.getLineNumber(); 395 final int column = this.locator.getColumnNumber(); 396 final StringBuffer sb = new StringBuffer(); 397 final int dez = 10; 398 399 if (line < dez) { 400 sb.append('0'); 401 } 402 403 sb.append(line); 404 sb.append('/'); 405 if (column < dez) { 406 sb.append('0'); 407 } 408 409 sb.append(column); 410 sb.append(':'); 411 412 return sb.toString(); 413 } 414 }