View Javadoc

1   /*
2    * Copyright 2009 - 2010 JEuclid, http://jeuclid.sf.net
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  /* $Id $ */
18  
19  package net.sourceforge.jeuclid.biparser;
20  
21  import java.io.StringReader;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.xml.sax.Attributes;
26  import org.xml.sax.InputSource;
27  import org.xml.sax.Locator;
28  import org.xml.sax.SAXException;
29  import org.xml.sax.helpers.DefaultHandler;
30  
31  /**
32   * this class is used for SAX parsing. it builds a BiTree out of a text while
33   * parsing
34   * 
35   * @version $Revision: 006d990a217c $
36   * 
37   */
38  public class JEuclidSAXHandler extends DefaultHandler {
39  
40      /**
41       * Logger for this class
42       */
43      private static final Log LOGGER = LogFactory
44              .getLog(JEuclidSAXHandler.class);
45  
46      /** stores characters while parsing (text of TextNodes). */
47      private StringBuffer textBuffer;
48  
49      /** locater for X&Y-position in inputtext. */
50      private Locator locator;
51  
52      /** BiTreeCreationHelper. */
53      private final BiTreeCreationHelper treeHelper;
54  
55      /** inputtext to parse. */
56      private final String content;
57  
58      /** current position in inputtext. */
59      private int position;
60  
61      /** previous position in inputtext. */
62      private int previousPosition;
63  
64      /** last line (y-position) in inputtext. */
65      private int lastLine;
66  
67      /** last column (x-position) in inputtext. */
68      private int lastColumn;
69  
70      /** BiTree to construct. */
71      private final BiTree tree;
72  
73      /**
74       * create a new SAX-Handler for parsing and creating a BiTree.
75       * 
76       * @param c
77       *            inputtext to parse
78       * @param t
79       *            BiTree to construct
80       */
81      public JEuclidSAXHandler(final String c, final BiTree t) {
82          this.position = 0;
83          this.previousPosition = 0;
84          this.lastLine = 1;
85          this.lastColumn = 1;
86  
87          this.content = c;
88          this.tree = t;
89          this.treeHelper = new BiTreeCreationHelper();
90      }
91  
92      /**
93       * set the document locator.
94       * 
95       * @param l
96       *            locator
97       */
98      @Override
99      public final void setDocumentLocator(final Locator l) {
100         this.locator = l;
101     }
102 
103     /**
104      * stop resolving of entities (dtd).
105      * 
106      * @param publicId
107      *            publicId
108      * @param systemId
109      *            systemId
110      * @return empty InputSource
111      */
112     @Override
113     public final InputSource resolveEntity(final String publicId,
114             final String systemId) {
115         return new InputSource(new StringReader(""));
116     }
117 
118     // ===========================================================
119     // SAX DocumentHandler methods
120     // ===========================================================
121     /**
122      * start document.
123      */
124     @Override
125     public final void startDocument() {
126         JEuclidSAXHandler.LOGGER.debug("SAX start document, length="
127                 + this.content.length());
128     }
129 
130     /**
131      * end document.
132      * 
133      * @throws SAXException
134      *             if a sax parse exception occurs
135      */
136     @Override
137     public final void endDocument() throws SAXException {
138         this.tree.setRoot(this.treeHelper.getRoot());
139         JEuclidSAXHandler.LOGGER.debug("SAX end document");
140     }
141 
142     /**
143      * start element, called at end of every new open tag.
144      * 
145      * @param namespaceURI
146      *            namespace
147      * @param sName
148      *            simple name
149      * @param qName
150      *            qualified name
151      * @param attrs
152      *            attributes of node
153      * @throws SAXException
154      *             if a sax parse exception occurs
155      */
156     @Override
157     public final void startElement(final String namespaceURI,
158             final String sName, final String qName, final Attributes attrs)
159             throws SAXException {
160 
161         int startPosition;
162         int length;
163         // element name
164         String eName;
165 
166         eName = sName;
167         if ("".equals(eName)) {
168             // not namespaceAware
169             eName = qName;
170         }
171 
172         // get current position in inputtext
173         this.contentPosition();
174 
175         // get startposition of tag
176         startPosition = this.content.lastIndexOf("<" + eName,
177                 this.position - 1);
178 
179         if (this.textBuffer == null) {
180             length = 0;
181         } else {
182             length = this.textBuffer.length();
183         }
184 
185         JEuclidSAXHandler.LOGGER.debug("tag-start=" + startPosition
186                 + " tag-end=" + this.position + " buffer="
187                 + (startPosition - this.previousPosition) + " textbuffer="
188                 + length);
189 
190         // create a EmptyNode if text is before this element
191         if (startPosition - this.previousPosition > 0) {
192             JEuclidSAXHandler.LOGGER.debug("empty length="
193                     + (startPosition - this.previousPosition));
194 
195             this.treeHelper.createEmtpyNode(startPosition
196                     - this.previousPosition);
197             this.textBuffer = null;
198         }
199 
200         this.printElement(namespaceURI, eName, true, startPosition, attrs);
201 
202         // create new BiNode
203         this.treeHelper.createBiNode(startPosition, this.position
204                 - startPosition, namespaceURI, eName, attrs);
205     }
206 
207     /**
208      * end element, called at end of every close tag.
209      * 
210      * @param namespaceURI
211      *            namespace
212      * @param sName
213      *            simple name
214      * @param qName
215      *            qulified name
216      */
217     @Override
218     public final void endElement(final String namespaceURI,
219             final String sName, final String qName) {
220         // element name
221         String eName = sName;
222         // text of a TextNode before close tag
223         String text;
224         // length of TextNode of EmptyNode
225         int textLength;
226         final String apo = "'";
227 
228         if ("".equals(eName)) {
229             // not namespaceAware
230             eName = qName;
231         }
232 
233         // get current position in inputtext (end-position of close tag)
234         this.contentPosition();
235 
236         // length of text before close tag
237         textLength = this.content.lastIndexOf("</", this.position - 1)
238                 - this.previousPosition;
239 
240         // create a new TextNode
241         if (this.textBuffer != null && this.textBuffer.length() > 0
242                 && this.treeHelper.allowNewTextNode()) {
243 
244             text = this.textBuffer.toString();
245             this.treeHelper.createTextNode(textLength, text);
246             this.textBuffer = null;
247 
248             JEuclidSAXHandler.LOGGER.debug(apo
249                     + text.replaceAll(this.nl(), "#") + apo);
250 
251         } else if (!this.treeHelper.allowNewTextNode() && textLength > 0) {
252             // or create a new EmptyNode
253             this.treeHelper.createEmtpyNode(textLength);
254         }
255 
256         /** close current BiNode in tree (set length of node) */
257         this.treeHelper.closeBiNode(this.position);
258 
259         this.printElement(namespaceURI, eName, false, this.position, null);
260     }
261 
262     /**
263      * concat characters while parsing.
264      * 
265      * @param buf
266      *            inputtext
267      * @param offset
268      *            offset of characters to inputtext
269      * @param len
270      *            number of characters
271      * @throws SAXException
272      *             if a sax parse exception occurs
273      */
274     @Override
275     public final void characters(final char[] buf, final int offset,
276             final int len) throws SAXException {
277         final String s = new String(buf, offset, len);
278 
279         if (this.textBuffer == null) {
280             this.textBuffer = new StringBuffer(s);
281         } else {
282             this.textBuffer.append(s);
283         }
284     }
285 
286     // ===========================================================
287     // Utility Methods ...
288     // ===========================================================
289     /**
290      * calculate current position in inputtext.
291      */
292     private void contentPosition() {
293         final int line = this.locator.getLineNumber();
294         final int column = this.locator.getColumnNumber();
295         int l;
296 
297         this.previousPosition = this.position;
298 
299         JEuclidSAXHandler.LOGGER.debug("old line=" + this.lastLine);
300         for (l = this.lastLine; l < line; l = l + 1) {
301             this.position = 1 + this.content
302                     .indexOf(this.nl(), this.position);
303             // System.out.println(" position = " + position + " ");
304         }
305 
306         if (this.lastLine == line) {
307             // tag is in same line as previous
308             this.position += column - this.lastColumn;
309         } else {
310             // position += column - 1;
311             // position += column - nl().length();
312             this.position += column - 2 + this.nl().length();
313         }
314 
315         this.lastLine = line;
316         this.lastColumn = column;
317         JEuclidSAXHandler.LOGGER.debug(" - new line=" + this.lastLine
318                 + " - old pos=" + this.previousPosition + " new pos="
319                 + this.position);
320     }
321 
322     /**
323      * print information about an elment.
324      * 
325      * @param namespaceURI
326      *            namespace
327      * @param name
328      *            of tag
329      * @param open
330      *            if true output an open tag, else close tag
331      * @param pos
332      *            position of tag
333      * @param attrs
334      *            attributes of tag
335      * 
336      */
337     private void printElement(final String namespaceURI, final String name,
338             final boolean open, final int pos, final Attributes attrs) {
339         final StringBuffer sb = new StringBuffer(32);
340 
341         sb.append(this.position());
342         sb.append(" - ");
343         sb.append(pos);
344 
345         if (open) {
346             sb.append(" <");
347         } else {
348             sb.append(" </");
349         }
350 
351         sb.append(name);
352 
353         if (attrs != null) {
354             for (int i = 0; i < attrs.getLength(); i = i + 1) {
355                 // Attr name
356                 String aName = attrs.getLocalName(i);
357 
358                 if ("".equals(aName)) {
359                     aName = attrs.getQName(i);
360                 }
361 
362                 sb.append(' ');
363                 sb.append(aName + "=\"" + attrs.getValue(i) + "\"");
364             }
365         }
366 
367         if (namespaceURI != null && namespaceURI.length() > 0) {
368             sb.append(' ');
369             sb.append(namespaceURI);
370         }
371         sb.append('>');
372         sb.append(this.nl());
373 
374         JEuclidSAXHandler.LOGGER.debug(sb.toString());
375     }
376 
377     /**
378      * get newline character.
379      * 
380      * @return newline
381      */
382     private String nl() {
383         // workaround for some problems with OS dependency
384         return "\n";
385         // return System.getProperty("line.separator");
386     }
387 
388     /**
389      * print current x/y-position.
390      * 
391      * @return current x/y-position
392      */
393     private String position() {
394         final int line = this.locator.getLineNumber();
395         final int column = this.locator.getColumnNumber();
396         final StringBuffer sb = new StringBuffer();
397         final int dez = 10;
398 
399         if (line < dez) {
400             sb.append('0');
401         }
402 
403         sb.append(line);
404         sb.append('/');
405         if (column < dez) {
406             sb.append('0');
407         }
408 
409         sb.append(column);
410         sb.append(':');
411 
412         return sb.toString();
413     }
414 }