001    /*
002     * Copyright 2009 - 2010 JEuclid, http://jeuclid.sf.net
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     *      http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    /* $Id $ */
018    
019    package net.sourceforge.jeuclid.biparser;
020    
021    import java.io.StringReader;
022    
023    import org.apache.commons.logging.Log;
024    import org.apache.commons.logging.LogFactory;
025    import org.xml.sax.Attributes;
026    import org.xml.sax.InputSource;
027    import org.xml.sax.Locator;
028    import org.xml.sax.SAXException;
029    import org.xml.sax.helpers.DefaultHandler;
030    
031    /**
032     * this class is used for SAX parsing. it builds a BiTree out of a text while
033     * parsing
034     * 
035     * @version $Revision: 006d990a217c $
036     * 
037     */
038    public class JEuclidSAXHandler extends DefaultHandler {
039    
040        /**
041         * Logger for this class
042         */
043        private static final Log LOGGER = LogFactory
044                .getLog(JEuclidSAXHandler.class);
045    
046        /** stores characters while parsing (text of TextNodes). */
047        private StringBuffer textBuffer;
048    
049        /** locater for X&Y-position in inputtext. */
050        private Locator locator;
051    
052        /** BiTreeCreationHelper. */
053        private final BiTreeCreationHelper treeHelper;
054    
055        /** inputtext to parse. */
056        private final String content;
057    
058        /** current position in inputtext. */
059        private int position;
060    
061        /** previous position in inputtext. */
062        private int previousPosition;
063    
064        /** last line (y-position) in inputtext. */
065        private int lastLine;
066    
067        /** last column (x-position) in inputtext. */
068        private int lastColumn;
069    
070        /** BiTree to construct. */
071        private final BiTree tree;
072    
073        /**
074         * create a new SAX-Handler for parsing and creating a BiTree.
075         * 
076         * @param c
077         *            inputtext to parse
078         * @param t
079         *            BiTree to construct
080         */
081        public JEuclidSAXHandler(final String c, final BiTree t) {
082            this.position = 0;
083            this.previousPosition = 0;
084            this.lastLine = 1;
085            this.lastColumn = 1;
086    
087            this.content = c;
088            this.tree = t;
089            this.treeHelper = new BiTreeCreationHelper();
090        }
091    
092        /**
093         * set the document locator.
094         * 
095         * @param l
096         *            locator
097         */
098        @Override
099        public final void setDocumentLocator(final Locator l) {
100            this.locator = l;
101        }
102    
103        /**
104         * stop resolving of entities (dtd).
105         * 
106         * @param publicId
107         *            publicId
108         * @param systemId
109         *            systemId
110         * @return empty InputSource
111         */
112        @Override
113        public final InputSource resolveEntity(final String publicId,
114                final String systemId) {
115            return new InputSource(new StringReader(""));
116        }
117    
118        // ===========================================================
119        // SAX DocumentHandler methods
120        // ===========================================================
121        /**
122         * start document.
123         */
124        @Override
125        public final void startDocument() {
126            JEuclidSAXHandler.LOGGER.debug("SAX start document, length="
127                    + this.content.length());
128        }
129    
130        /**
131         * end document.
132         * 
133         * @throws SAXException
134         *             if a sax parse exception occurs
135         */
136        @Override
137        public final void endDocument() throws SAXException {
138            this.tree.setRoot(this.treeHelper.getRoot());
139            JEuclidSAXHandler.LOGGER.debug("SAX end document");
140        }
141    
142        /**
143         * start element, called at end of every new open tag.
144         * 
145         * @param namespaceURI
146         *            namespace
147         * @param sName
148         *            simple name
149         * @param qName
150         *            qualified name
151         * @param attrs
152         *            attributes of node
153         * @throws SAXException
154         *             if a sax parse exception occurs
155         */
156        @Override
157        public final void startElement(final String namespaceURI,
158                final String sName, final String qName, final Attributes attrs)
159                throws SAXException {
160    
161            int startPosition;
162            int length;
163            // element name
164            String eName;
165    
166            eName = sName;
167            if ("".equals(eName)) {
168                // not namespaceAware
169                eName = qName;
170            }
171    
172            // get current position in inputtext
173            this.contentPosition();
174    
175            // get startposition of tag
176            startPosition = this.content.lastIndexOf("<" + eName,
177                    this.position - 1);
178    
179            if (this.textBuffer == null) {
180                length = 0;
181            } else {
182                length = this.textBuffer.length();
183            }
184    
185            JEuclidSAXHandler.LOGGER.debug("tag-start=" + startPosition
186                    + " tag-end=" + this.position + " buffer="
187                    + (startPosition - this.previousPosition) + " textbuffer="
188                    + length);
189    
190            // create a EmptyNode if text is before this element
191            if (startPosition - this.previousPosition > 0) {
192                JEuclidSAXHandler.LOGGER.debug("empty length="
193                        + (startPosition - this.previousPosition));
194    
195                this.treeHelper.createEmtpyNode(startPosition
196                        - this.previousPosition);
197                this.textBuffer = null;
198            }
199    
200            this.printElement(namespaceURI, eName, true, startPosition, attrs);
201    
202            // create new BiNode
203            this.treeHelper.createBiNode(startPosition, this.position
204                    - startPosition, namespaceURI, eName, attrs);
205        }
206    
207        /**
208         * end element, called at end of every close tag.
209         * 
210         * @param namespaceURI
211         *            namespace
212         * @param sName
213         *            simple name
214         * @param qName
215         *            qulified name
216         */
217        @Override
218        public final void endElement(final String namespaceURI,
219                final String sName, final String qName) {
220            // element name
221            String eName = sName;
222            // text of a TextNode before close tag
223            String text;
224            // length of TextNode of EmptyNode
225            int textLength;
226            final String apo = "'";
227    
228            if ("".equals(eName)) {
229                // not namespaceAware
230                eName = qName;
231            }
232    
233            // get current position in inputtext (end-position of close tag)
234            this.contentPosition();
235    
236            // length of text before close tag
237            textLength = this.content.lastIndexOf("</", this.position - 1)
238                    - this.previousPosition;
239    
240            // create a new TextNode
241            if (this.textBuffer != null && this.textBuffer.length() > 0
242                    && this.treeHelper.allowNewTextNode()) {
243    
244                text = this.textBuffer.toString();
245                this.treeHelper.createTextNode(textLength, text);
246                this.textBuffer = null;
247    
248                JEuclidSAXHandler.LOGGER.debug(apo
249                        + text.replaceAll(this.nl(), "#") + apo);
250    
251            } else if (!this.treeHelper.allowNewTextNode() && textLength > 0) {
252                // or create a new EmptyNode
253                this.treeHelper.createEmtpyNode(textLength);
254            }
255    
256            /** close current BiNode in tree (set length of node) */
257            this.treeHelper.closeBiNode(this.position);
258    
259            this.printElement(namespaceURI, eName, false, this.position, null);
260        }
261    
262        /**
263         * concat characters while parsing.
264         * 
265         * @param buf
266         *            inputtext
267         * @param offset
268         *            offset of characters to inputtext
269         * @param len
270         *            number of characters
271         * @throws SAXException
272         *             if a sax parse exception occurs
273         */
274        @Override
275        public final void characters(final char[] buf, final int offset,
276                final int len) throws SAXException {
277            final String s = new String(buf, offset, len);
278    
279            if (this.textBuffer == null) {
280                this.textBuffer = new StringBuffer(s);
281            } else {
282                this.textBuffer.append(s);
283            }
284        }
285    
286        // ===========================================================
287        // Utility Methods ...
288        // ===========================================================
289        /**
290         * calculate current position in inputtext.
291         */
292        private void contentPosition() {
293            final int line = this.locator.getLineNumber();
294            final int column = this.locator.getColumnNumber();
295            int l;
296    
297            this.previousPosition = this.position;
298    
299            JEuclidSAXHandler.LOGGER.debug("old line=" + this.lastLine);
300            for (l = this.lastLine; l < line; l = l + 1) {
301                this.position = 1 + this.content
302                        .indexOf(this.nl(), this.position);
303                // System.out.println(" position = " + position + " ");
304            }
305    
306            if (this.lastLine == line) {
307                // tag is in same line as previous
308                this.position += column - this.lastColumn;
309            } else {
310                // position += column - 1;
311                // position += column - nl().length();
312                this.position += column - 2 + this.nl().length();
313            }
314    
315            this.lastLine = line;
316            this.lastColumn = column;
317            JEuclidSAXHandler.LOGGER.debug(" - new line=" + this.lastLine
318                    + " - old pos=" + this.previousPosition + " new pos="
319                    + this.position);
320        }
321    
322        /**
323         * print information about an elment.
324         * 
325         * @param namespaceURI
326         *            namespace
327         * @param name
328         *            of tag
329         * @param open
330         *            if true output an open tag, else close tag
331         * @param pos
332         *            position of tag
333         * @param attrs
334         *            attributes of tag
335         * 
336         */
337        private void printElement(final String namespaceURI, final String name,
338                final boolean open, final int pos, final Attributes attrs) {
339            final StringBuffer sb = new StringBuffer(32);
340    
341            sb.append(this.position());
342            sb.append(" - ");
343            sb.append(pos);
344    
345            if (open) {
346                sb.append(" <");
347            } else {
348                sb.append(" </");
349            }
350    
351            sb.append(name);
352    
353            if (attrs != null) {
354                for (int i = 0; i < attrs.getLength(); i = i + 1) {
355                    // Attr name
356                    String aName = attrs.getLocalName(i);
357    
358                    if ("".equals(aName)) {
359                        aName = attrs.getQName(i);
360                    }
361    
362                    sb.append(' ');
363                    sb.append(aName + "=\"" + attrs.getValue(i) + "\"");
364                }
365            }
366    
367            if (namespaceURI != null && namespaceURI.length() > 0) {
368                sb.append(' ');
369                sb.append(namespaceURI);
370            }
371            sb.append('>');
372            sb.append(this.nl());
373    
374            JEuclidSAXHandler.LOGGER.debug(sb.toString());
375        }
376    
377        /**
378         * get newline character.
379         * 
380         * @return newline
381         */
382        private String nl() {
383            // workaround for some problems with OS dependency
384            return "\n";
385            // return System.getProperty("line.separator");
386        }
387    
388        /**
389         * print current x/y-position.
390         * 
391         * @return current x/y-position
392         */
393        private String position() {
394            final int line = this.locator.getLineNumber();
395            final int column = this.locator.getColumnNumber();
396            final StringBuffer sb = new StringBuffer();
397            final int dez = 10;
398    
399            if (line < dez) {
400                sb.append('0');
401            }
402    
403            sb.append(line);
404            sb.append('/');
405            if (column < dez) {
406                sb.append('0');
407            }
408    
409            sb.append(column);
410            sb.append(':');
411    
412            return sb.toString();
413        }
414    }