001    /*
002     * Copyright 2007 - 2007 JEuclid, http://jeuclid.sf.net
003     * 
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     *      http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    /* $Id: Parser.java,v 2bab6eb875e8 2010/08/11 16:45:50 max $ */
018    
019    package net.sourceforge.jeuclid.parser;
020    
021    import java.io.BufferedInputStream;
022    import java.io.FilterInputStream;
023    import java.io.IOException;
024    import java.io.InputStream;
025    import java.io.Reader;
026    import java.lang.ref.Reference;
027    import java.lang.ref.SoftReference;
028    import java.util.Map;
029    import java.util.concurrent.ConcurrentHashMap;
030    import java.util.zip.ZipEntry;
031    import java.util.zip.ZipInputStream;
032    
033    import javax.annotation.concurrent.ThreadSafe;
034    import javax.xml.parsers.DocumentBuilder;
035    import javax.xml.parsers.DocumentBuilderFactory;
036    import javax.xml.parsers.ParserConfigurationException;
037    import javax.xml.transform.Source;
038    import javax.xml.transform.Transformer;
039    import javax.xml.transform.TransformerException;
040    import javax.xml.transform.TransformerFactory;
041    import javax.xml.transform.dom.DOMResult;
042    import javax.xml.transform.dom.DOMSource;
043    import javax.xml.transform.stream.StreamSource;
044    
045    import net.sourceforge.jeuclid.ResourceEntityResolver;
046    
047    import org.apache.commons.logging.Log;
048    import org.apache.commons.logging.LogFactory;
049    import org.apache.xmlgraphics.image.loader.ImageSource;
050    import org.w3c.dom.Document;
051    import org.w3c.dom.Node;
052    import org.xml.sax.ErrorHandler;
053    import org.xml.sax.InputSource;
054    import org.xml.sax.SAXException;
055    import org.xml.sax.SAXParseException;
056    
057    /**
058     * A JAXP compatible approach to MathML Parsing.
059     * 
060     * @version $Revision: 2bab6eb875e8 $
061     */
062    // CHECKSTYLE:OFF
063    // This class is too complex.
064    @ThreadSafe
065    public final class Parser {
066        // CHECKSTYLE:ON
067    
068        private static final class LoggerErrorHandler implements ErrorHandler {
069            public LoggerErrorHandler() {
070                // Empty on purpose
071            }
072    
073            public void error(final SAXParseException exception)
074                    throws SAXException {
075                Parser.LOGGER.warn(exception);
076            }
077    
078            public void fatalError(final SAXParseException exception)
079                    throws SAXException {
080                throw exception;
081            }
082    
083            public void warning(final SAXParseException exception)
084                    throws SAXException {
085                Parser.LOGGER.debug(exception);
086            }
087        }
088    
089        private static final class UnclosableInputStream extends FilterInputStream {
090            protected UnclosableInputStream(final InputStream in) {
091                super(in);
092            }
093    
094            @Override
095            public void close() throws IOException {
096                // Do Nothing.
097            }
098        }
099    
100        /**
101         * Detection buffer size. Rationale: After the first 128 bytes a XML file
102         * and a ZIP file should be distinguishable.
103         */
104        private static final int DETECTION_BUFFER_SIZE = 128;
105    
106        private static final String BAD_STREAM_SOURCE = "Bad StreamSource: ";
107    
108        private static final String CONTENT_XML = "content.xml";
109    
110        private static final String CANNOT_HANDLE_SOURCE = "Cannot handle Source: ";
111    
112        private static final class SingletonHolder {
113            private static final Parser INSTANCE = new Parser();
114    
115            private SingletonHolder() {
116            }
117        }
118    
119        /**
120         * Logger for this class.
121         */
122        private static final Log LOGGER = LogFactory.getLog(Parser.class);
123    
124        private final Map<Long, Reference<DocumentBuilder>> builders;
125    
126        /**
127         * Default constructor.
128         */
129        protected Parser() {
130            this.builders = new ConcurrentHashMap<Long, Reference<DocumentBuilder>>();
131        }
132    
133        private DocumentBuilder createDocumentBuilder() {
134            DocumentBuilder documentBuilder;
135            try {
136                try {
137                    documentBuilder = this.tryCreateDocumentBuilder(true);
138                } catch (final UnsupportedOperationException uoe) {
139                    Parser.LOGGER.debug("Unsupported Operation: "
140                            + uoe.getMessage());
141                    documentBuilder = this.tryCreateDocumentBuilder(false);
142                } catch (final ParserConfigurationException pce) {
143                    Parser.LOGGER.debug("ParserConfigurationException: "
144                            + pce.getMessage());
145                    documentBuilder = this.tryCreateDocumentBuilder(false);
146                }
147                documentBuilder.setEntityResolver(new ResourceEntityResolver());
148                documentBuilder.setErrorHandler(new LoggerErrorHandler());
149            } catch (final ParserConfigurationException pce2) {
150                Parser.LOGGER.warn("Could not create Parser: " + pce2.getMessage());
151                assert false : "Could not create Parser";
152                documentBuilder = null;
153            }
154            return documentBuilder;
155        }
156    
157        private DocumentBuilder tryCreateDocumentBuilder(final boolean xinclude)
158                throws ParserConfigurationException {
159            final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory
160                    .newInstance();
161            documentBuilderFactory.setNamespaceAware(true);
162            if (xinclude) {
163                documentBuilderFactory.setXIncludeAware(true);
164            }
165            final DocumentBuilder documentBuilder = documentBuilderFactory
166                    .newDocumentBuilder();
167            return documentBuilder;
168        }
169    
170        /**
171         * Retrieve the singleton Parser instance.
172         * 
173         * @return a Parser object.
174         */
175        public static Parser getInstance() {
176            return Parser.SingletonHolder.INSTANCE;
177        }
178    
179        /**
180         * use {@link #getInstance()} instead.
181         * 
182         * @return see {@link #getInstance()}
183         * @throws ParserConfigurationException
184         *             see {@link #getInstance()}
185         * @deprecated use {@link #getInstance()} instead.
186         */
187        @Deprecated
188        public static Parser getParser() throws ParserConfigurationException {
189            return Parser.getInstance();
190        }
191    
192        /**
193         * Parse a StreamSource and return its Document.
194         * <p>
195         * This method will auto-detect ODF or XML format and load an appropriate
196         * parser.
197         * 
198         * @param streamSource
199         *            A StreamSource.
200         * @return A DOM Document representation for this source.
201         * @throws SAXException
202         *             if a parse error occurred.
203         * @throws IOException
204         *             if an I/O error occurred.
205         */
206        public Document parseStreamSource(final StreamSource streamSource)
207                throws SAXException, IOException {
208            Document retVal = null;
209            InputStream inputStream = streamSource.getInputStream();
210            if (inputStream != null) {
211    
212                // Alternative 1: Parse as XML, and fall back to ODF
213                if (!inputStream.markSupported()) {
214                    inputStream = new BufferedInputStream(inputStream);
215                }
216                final InputStream filterInput = new UnclosableInputStream(
217                        inputStream);
218                filterInput.mark(Parser.DETECTION_BUFFER_SIZE);
219                try {
220                    retVal = this.parseStreamSourceAsXml(new StreamSource(
221                            filterInput));
222                    inputStream.close();
223                } catch (final SAXParseException se) {
224                    filterInput.reset();
225                    try {
226                        retVal = this.parseStreamSourceAsOdf(new StreamSource(
227                                filterInput));
228                    } catch (final IOException io) {
229                        throw se;
230                    }
231                    inputStream.close();
232                }
233    
234                // Alternative 2: peek for ZIP magic and call matching parser.
235    
236                // final PushbackInputStream pi = new PushbackInputStream(
237                // inputStream, 4);
238                // final byte[] magic = new byte[4];
239                // pi.read(magic);
240                // pi.unread(magic);
241                // if ((magic[0] == 'P') && (magic[1] == 'K') && (magic[2] == 3)
242                // && (magic[3] == 4)) {
243                // retVal = this.parseStreamSourceAsOdf(streamSource);
244                // }
245            }
246            if (retVal == null) {
247                retVal = this.parseStreamSourceAsXml(streamSource);
248            }
249            return retVal;
250        }
251    
252        /**
253         * Parse a given StreamSource which represents an ODF document.
254         * 
255         * @param streamSource
256         *            StreamSource to parse.
257         * @return the Document contained within.
258         * @throws SAXException
259         *             if a parse error occurred.
260         * @throws IOException
261         *             if an I/O error occurred.
262         */
263        public Document parseStreamSourceAsOdf(final StreamSource streamSource)
264                throws IOException, SAXException {
265            final InputStream is = streamSource.getInputStream();
266            if (is == null) {
267                throw new IllegalArgumentException(Parser.BAD_STREAM_SOURCE
268                        + streamSource);
269            }
270            final ZipInputStream zipStream = new ZipInputStream(is);
271            Document document = null;
272            ZipEntry entry = zipStream.getNextEntry();
273            while (entry != null) {
274                if (Parser.CONTENT_XML.equals(entry.getName())) {
275                    document = this.getDocumentBuilder().parse(zipStream);
276                    entry = null;
277                } else {
278                    entry = zipStream.getNextEntry();
279                }
280            }
281            return document;
282        }
283    
284        /**
285         * Parse a given StreamSource which represents an XML document.
286         * 
287         * @param streamSource
288         *            StreamSource to parse.
289         * @return the Document contained within.
290         * @throws SAXException
291         *             if a parse error occurred.
292         * @throws IOException
293         *             if an I/O error occurred.
294         */
295        public Document parseStreamSourceAsXml(final StreamSource streamSource)
296                throws SAXException, IOException {
297            InputSource inp = null;
298            final String systemId = streamSource.getSystemId();
299            if (systemId != null) {
300                inp = new InputSource(systemId);
301            }
302            final InputStream is = streamSource.getInputStream();
303            if ((inp == null) && (is != null)) {
304                inp = new InputSource(is);
305            }
306            final Reader ir = streamSource.getReader();
307            if ((inp == null) && (ir != null)) {
308                inp = new InputSource(ir);
309            }
310    
311            if (inp == null) {
312                throw new IllegalArgumentException(Parser.BAD_STREAM_SOURCE
313                        + streamSource);
314            }
315    
316            return this.getDocumentBuilder().parse(inp);
317        }
318    
319        /**
320         * Retrieve a DocumentBuilder suitable for MathML parsing.
321         * <p>
322         * Please note:
323         * <ul>
324         * <li>There is one instance of the builder per thread.</li>
325         * <li>The builder instance is not thread safe, so it may not be passed
326         * among threads.</li>
327         * <li>Multiple Threads may call getDocumentBuilder concurrently</li>
328         * </ul>
329         * 
330         * @return a DocumentBuilder
331         */
332        public DocumentBuilder getDocumentBuilder() {
333            // Note: No synchronization needed, as id will be different for every
334            // thread!
335            final long id = Thread.currentThread().getId();
336            final Reference<DocumentBuilder> builderRef = this.builders.get(id);
337            if (builderRef != null) {
338                final DocumentBuilder builder = builderRef.get();
339                if (builder != null) {
340                    return builder;
341                }
342            }
343            final DocumentBuilder builder = this.createDocumentBuilder();
344            this.builders.put(id, new SoftReference<DocumentBuilder>(builder));
345            return builder;
346        }
347    
348        /**
349         * Extract the top Node from a given Source.
350         * 
351         * @param source
352         *            the Source to use. Currently supported are {@link DOMSource} ,
353         *            {@link StreamSource}
354         * @return the top NODE.
355         * @throws SAXException
356         *             if a parse error occurred.
357         * @throws IOException
358         *             if an I/O error occurred.
359         */
360        public Node parse(final Source source) throws SAXException, IOException {
361            final Node retVal;
362            if (source instanceof StreamSource) {
363                final StreamSource streamSource = (StreamSource) source;
364                retVal = this.parseStreamSource(streamSource);
365            } else if (source instanceof ImageSource) {
366                final ImageSource imageSource = (ImageSource) source;
367                final StreamSource streamSource = new StreamSource(imageSource
368                        .getInputStream());
369                retVal = this.parseStreamSource(streamSource);
370            } else if (source instanceof DOMSource) {
371                final DOMSource domSource = (DOMSource) source;
372                retVal = domSource.getNode();
373            } else {
374                try {
375                    final Transformer t = TransformerFactory.newInstance()
376                            .newTransformer();
377                    final DOMResult r = new DOMResult();
378                    t.transform(source, r);
379                    retVal = r.getNode();
380                } catch (final TransformerException e) {
381                    Parser.LOGGER.warn(e.getMessage());
382                    throw new IllegalArgumentException(Parser.CANNOT_HANDLE_SOURCE
383                            + source, e);
384                }
385            }
386            return retVal;
387        }
388    }