001 /*
002 * Copyright 2009 - 2010 JEuclid, http://jeuclid.sf.net
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017 /* $Id $ */
018
019 package net.sourceforge.jeuclid.biparser;
020
021 import java.io.StringReader;
022
023 import org.apache.commons.logging.Log;
024 import org.apache.commons.logging.LogFactory;
025 import org.xml.sax.Attributes;
026 import org.xml.sax.InputSource;
027 import org.xml.sax.Locator;
028 import org.xml.sax.SAXException;
029 import org.xml.sax.helpers.DefaultHandler;
030
031 /**
032 * this class is used for SAX parsing. it builds a BiTree out of a text while
033 * parsing
034 *
035 * @version $Revision: 006d990a217c $
036 *
037 */
038 public class JEuclidSAXHandler extends DefaultHandler {
039
040 /**
041 * Logger for this class
042 */
043 private static final Log LOGGER = LogFactory
044 .getLog(JEuclidSAXHandler.class);
045
046 /** stores characters while parsing (text of TextNodes). */
047 private StringBuffer textBuffer;
048
049 /** locater for X&Y-position in inputtext. */
050 private Locator locator;
051
052 /** BiTreeCreationHelper. */
053 private final BiTreeCreationHelper treeHelper;
054
055 /** inputtext to parse. */
056 private final String content;
057
058 /** current position in inputtext. */
059 private int position;
060
061 /** previous position in inputtext. */
062 private int previousPosition;
063
064 /** last line (y-position) in inputtext. */
065 private int lastLine;
066
067 /** last column (x-position) in inputtext. */
068 private int lastColumn;
069
070 /** BiTree to construct. */
071 private final BiTree tree;
072
073 /**
074 * create a new SAX-Handler for parsing and creating a BiTree.
075 *
076 * @param c
077 * inputtext to parse
078 * @param t
079 * BiTree to construct
080 */
081 public JEuclidSAXHandler(final String c, final BiTree t) {
082 this.position = 0;
083 this.previousPosition = 0;
084 this.lastLine = 1;
085 this.lastColumn = 1;
086
087 this.content = c;
088 this.tree = t;
089 this.treeHelper = new BiTreeCreationHelper();
090 }
091
092 /**
093 * set the document locator.
094 *
095 * @param l
096 * locator
097 */
098 @Override
099 public final void setDocumentLocator(final Locator l) {
100 this.locator = l;
101 }
102
103 /**
104 * stop resolving of entities (dtd).
105 *
106 * @param publicId
107 * publicId
108 * @param systemId
109 * systemId
110 * @return empty InputSource
111 */
112 @Override
113 public final InputSource resolveEntity(final String publicId,
114 final String systemId) {
115 return new InputSource(new StringReader(""));
116 }
117
118 // ===========================================================
119 // SAX DocumentHandler methods
120 // ===========================================================
121 /**
122 * start document.
123 */
124 @Override
125 public final void startDocument() {
126 JEuclidSAXHandler.LOGGER.debug("SAX start document, length="
127 + this.content.length());
128 }
129
130 /**
131 * end document.
132 *
133 * @throws SAXException
134 * if a sax parse exception occurs
135 */
136 @Override
137 public final void endDocument() throws SAXException {
138 this.tree.setRoot(this.treeHelper.getRoot());
139 JEuclidSAXHandler.LOGGER.debug("SAX end document");
140 }
141
142 /**
143 * start element, called at end of every new open tag.
144 *
145 * @param namespaceURI
146 * namespace
147 * @param sName
148 * simple name
149 * @param qName
150 * qualified name
151 * @param attrs
152 * attributes of node
153 * @throws SAXException
154 * if a sax parse exception occurs
155 */
156 @Override
157 public final void startElement(final String namespaceURI,
158 final String sName, final String qName, final Attributes attrs)
159 throws SAXException {
160
161 int startPosition;
162 int length;
163 // element name
164 String eName;
165
166 eName = sName;
167 if ("".equals(eName)) {
168 // not namespaceAware
169 eName = qName;
170 }
171
172 // get current position in inputtext
173 this.contentPosition();
174
175 // get startposition of tag
176 startPosition = this.content.lastIndexOf("<" + eName,
177 this.position - 1);
178
179 if (this.textBuffer == null) {
180 length = 0;
181 } else {
182 length = this.textBuffer.length();
183 }
184
185 JEuclidSAXHandler.LOGGER.debug("tag-start=" + startPosition
186 + " tag-end=" + this.position + " buffer="
187 + (startPosition - this.previousPosition) + " textbuffer="
188 + length);
189
190 // create a EmptyNode if text is before this element
191 if (startPosition - this.previousPosition > 0) {
192 JEuclidSAXHandler.LOGGER.debug("empty length="
193 + (startPosition - this.previousPosition));
194
195 this.treeHelper.createEmtpyNode(startPosition
196 - this.previousPosition);
197 this.textBuffer = null;
198 }
199
200 this.printElement(namespaceURI, eName, true, startPosition, attrs);
201
202 // create new BiNode
203 this.treeHelper.createBiNode(startPosition, this.position
204 - startPosition, namespaceURI, eName, attrs);
205 }
206
207 /**
208 * end element, called at end of every close tag.
209 *
210 * @param namespaceURI
211 * namespace
212 * @param sName
213 * simple name
214 * @param qName
215 * qulified name
216 */
217 @Override
218 public final void endElement(final String namespaceURI,
219 final String sName, final String qName) {
220 // element name
221 String eName = sName;
222 // text of a TextNode before close tag
223 String text;
224 // length of TextNode of EmptyNode
225 int textLength;
226 final String apo = "'";
227
228 if ("".equals(eName)) {
229 // not namespaceAware
230 eName = qName;
231 }
232
233 // get current position in inputtext (end-position of close tag)
234 this.contentPosition();
235
236 // length of text before close tag
237 textLength = this.content.lastIndexOf("</", this.position - 1)
238 - this.previousPosition;
239
240 // create a new TextNode
241 if (this.textBuffer != null && this.textBuffer.length() > 0
242 && this.treeHelper.allowNewTextNode()) {
243
244 text = this.textBuffer.toString();
245 this.treeHelper.createTextNode(textLength, text);
246 this.textBuffer = null;
247
248 JEuclidSAXHandler.LOGGER.debug(apo
249 + text.replaceAll(this.nl(), "#") + apo);
250
251 } else if (!this.treeHelper.allowNewTextNode() && textLength > 0) {
252 // or create a new EmptyNode
253 this.treeHelper.createEmtpyNode(textLength);
254 }
255
256 /** close current BiNode in tree (set length of node) */
257 this.treeHelper.closeBiNode(this.position);
258
259 this.printElement(namespaceURI, eName, false, this.position, null);
260 }
261
262 /**
263 * concat characters while parsing.
264 *
265 * @param buf
266 * inputtext
267 * @param offset
268 * offset of characters to inputtext
269 * @param len
270 * number of characters
271 * @throws SAXException
272 * if a sax parse exception occurs
273 */
274 @Override
275 public final void characters(final char[] buf, final int offset,
276 final int len) throws SAXException {
277 final String s = new String(buf, offset, len);
278
279 if (this.textBuffer == null) {
280 this.textBuffer = new StringBuffer(s);
281 } else {
282 this.textBuffer.append(s);
283 }
284 }
285
286 // ===========================================================
287 // Utility Methods ...
288 // ===========================================================
289 /**
290 * calculate current position in inputtext.
291 */
292 private void contentPosition() {
293 final int line = this.locator.getLineNumber();
294 final int column = this.locator.getColumnNumber();
295 int l;
296
297 this.previousPosition = this.position;
298
299 JEuclidSAXHandler.LOGGER.debug("old line=" + this.lastLine);
300 for (l = this.lastLine; l < line; l = l + 1) {
301 this.position = 1 + this.content
302 .indexOf(this.nl(), this.position);
303 // System.out.println(" position = " + position + " ");
304 }
305
306 if (this.lastLine == line) {
307 // tag is in same line as previous
308 this.position += column - this.lastColumn;
309 } else {
310 // position += column - 1;
311 // position += column - nl().length();
312 this.position += column - 2 + this.nl().length();
313 }
314
315 this.lastLine = line;
316 this.lastColumn = column;
317 JEuclidSAXHandler.LOGGER.debug(" - new line=" + this.lastLine
318 + " - old pos=" + this.previousPosition + " new pos="
319 + this.position);
320 }
321
322 /**
323 * print information about an elment.
324 *
325 * @param namespaceURI
326 * namespace
327 * @param name
328 * of tag
329 * @param open
330 * if true output an open tag, else close tag
331 * @param pos
332 * position of tag
333 * @param attrs
334 * attributes of tag
335 *
336 */
337 private void printElement(final String namespaceURI, final String name,
338 final boolean open, final int pos, final Attributes attrs) {
339 final StringBuffer sb = new StringBuffer(32);
340
341 sb.append(this.position());
342 sb.append(" - ");
343 sb.append(pos);
344
345 if (open) {
346 sb.append(" <");
347 } else {
348 sb.append(" </");
349 }
350
351 sb.append(name);
352
353 if (attrs != null) {
354 for (int i = 0; i < attrs.getLength(); i = i + 1) {
355 // Attr name
356 String aName = attrs.getLocalName(i);
357
358 if ("".equals(aName)) {
359 aName = attrs.getQName(i);
360 }
361
362 sb.append(' ');
363 sb.append(aName + "=\"" + attrs.getValue(i) + "\"");
364 }
365 }
366
367 if (namespaceURI != null && namespaceURI.length() > 0) {
368 sb.append(' ');
369 sb.append(namespaceURI);
370 }
371 sb.append('>');
372 sb.append(this.nl());
373
374 JEuclidSAXHandler.LOGGER.debug(sb.toString());
375 }
376
377 /**
378 * get newline character.
379 *
380 * @return newline
381 */
382 private String nl() {
383 // workaround for some problems with OS dependency
384 return "\n";
385 // return System.getProperty("line.separator");
386 }
387
388 /**
389 * print current x/y-position.
390 *
391 * @return current x/y-position
392 */
393 private String position() {
394 final int line = this.locator.getLineNumber();
395 final int column = this.locator.getColumnNumber();
396 final StringBuffer sb = new StringBuffer();
397 final int dez = 10;
398
399 if (line < dez) {
400 sb.append('0');
401 }
402
403 sb.append(line);
404 sb.append('/');
405 if (column < dez) {
406 sb.append('0');
407 }
408
409 sb.append(column);
410 sb.append(':');
411
412 return sb.toString();
413 }
414 }