View Javadoc

1   /*
2    * Copyright 2007 - 2007 JEuclid, http://jeuclid.sf.net
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  /* $Id: Parser.java,v 2bab6eb875e8 2010/08/11 16:45:50 max $ */
18  
19  package net.sourceforge.jeuclid.parser;
20  
21  import java.io.BufferedInputStream;
22  import java.io.FilterInputStream;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.Reader;
26  import java.lang.ref.Reference;
27  import java.lang.ref.SoftReference;
28  import java.util.Map;
29  import java.util.concurrent.ConcurrentHashMap;
30  import java.util.zip.ZipEntry;
31  import java.util.zip.ZipInputStream;
32  
33  import javax.annotation.concurrent.ThreadSafe;
34  import javax.xml.parsers.DocumentBuilder;
35  import javax.xml.parsers.DocumentBuilderFactory;
36  import javax.xml.parsers.ParserConfigurationException;
37  import javax.xml.transform.Source;
38  import javax.xml.transform.Transformer;
39  import javax.xml.transform.TransformerException;
40  import javax.xml.transform.TransformerFactory;
41  import javax.xml.transform.dom.DOMResult;
42  import javax.xml.transform.dom.DOMSource;
43  import javax.xml.transform.stream.StreamSource;
44  
45  import net.sourceforge.jeuclid.ResourceEntityResolver;
46  
47  import org.apache.commons.logging.Log;
48  import org.apache.commons.logging.LogFactory;
49  import org.apache.xmlgraphics.image.loader.ImageSource;
50  import org.w3c.dom.Document;
51  import org.w3c.dom.Node;
52  import org.xml.sax.ErrorHandler;
53  import org.xml.sax.InputSource;
54  import org.xml.sax.SAXException;
55  import org.xml.sax.SAXParseException;
56  
57  /**
58   * A JAXP compatible approach to MathML Parsing.
59   * 
60   * @version $Revision: 2bab6eb875e8 $
61   */
62  // CHECKSTYLE:OFF
63  // This class is too complex.
64  @ThreadSafe
65  public final class Parser {
66      // CHECKSTYLE:ON
67  
68      private static final class LoggerErrorHandler implements ErrorHandler {
69          public LoggerErrorHandler() {
70              // Empty on purpose
71          }
72  
73          public void error(final SAXParseException exception)
74                  throws SAXException {
75              Parser.LOGGER.warn(exception);
76          }
77  
78          public void fatalError(final SAXParseException exception)
79                  throws SAXException {
80              throw exception;
81          }
82  
83          public void warning(final SAXParseException exception)
84                  throws SAXException {
85              Parser.LOGGER.debug(exception);
86          }
87      }
88  
89      private static final class UnclosableInputStream extends FilterInputStream {
90          protected UnclosableInputStream(final InputStream in) {
91              super(in);
92          }
93  
94          @Override
95          public void close() throws IOException {
96              // Do Nothing.
97          }
98      }
99  
100     /**
101      * Detection buffer size. Rationale: After the first 128 bytes a XML file
102      * and a ZIP file should be distinguishable.
103      */
104     private static final int DETECTION_BUFFER_SIZE = 128;
105 
106     private static final String BAD_STREAM_SOURCE = "Bad StreamSource: ";
107 
108     private static final String CONTENT_XML = "content.xml";
109 
110     private static final String CANNOT_HANDLE_SOURCE = "Cannot handle Source: ";
111 
112     private static final class SingletonHolder {
113         private static final Parser INSTANCE = new Parser();
114 
115         private SingletonHolder() {
116         }
117     }
118 
119     /**
120      * Logger for this class.
121      */
122     private static final Log LOGGER = LogFactory.getLog(Parser.class);
123 
124     private final Map<Long, Reference<DocumentBuilder>> builders;
125 
126     /**
127      * Default constructor.
128      */
129     protected Parser() {
130         this.builders = new ConcurrentHashMap<Long, Reference<DocumentBuilder>>();
131     }
132 
133     private DocumentBuilder createDocumentBuilder() {
134         DocumentBuilder documentBuilder;
135         try {
136             try {
137                 documentBuilder = this.tryCreateDocumentBuilder(true);
138             } catch (final UnsupportedOperationException uoe) {
139                 Parser.LOGGER.debug("Unsupported Operation: "
140                         + uoe.getMessage());
141                 documentBuilder = this.tryCreateDocumentBuilder(false);
142             } catch (final ParserConfigurationException pce) {
143                 Parser.LOGGER.debug("ParserConfigurationException: "
144                         + pce.getMessage());
145                 documentBuilder = this.tryCreateDocumentBuilder(false);
146             }
147             documentBuilder.setEntityResolver(new ResourceEntityResolver());
148             documentBuilder.setErrorHandler(new LoggerErrorHandler());
149         } catch (final ParserConfigurationException pce2) {
150             Parser.LOGGER.warn("Could not create Parser: " + pce2.getMessage());
151             assert false : "Could not create Parser";
152             documentBuilder = null;
153         }
154         return documentBuilder;
155     }
156 
157     private DocumentBuilder tryCreateDocumentBuilder(final boolean xinclude)
158             throws ParserConfigurationException {
159         final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory
160                 .newInstance();
161         documentBuilderFactory.setNamespaceAware(true);
162         if (xinclude) {
163             documentBuilderFactory.setXIncludeAware(true);
164         }
165         final DocumentBuilder documentBuilder = documentBuilderFactory
166                 .newDocumentBuilder();
167         return documentBuilder;
168     }
169 
170     /**
171      * Retrieve the singleton Parser instance.
172      * 
173      * @return a Parser object.
174      */
175     public static Parser getInstance() {
176         return Parser.SingletonHolder.INSTANCE;
177     }
178 
179     /**
180      * use {@link #getInstance()} instead.
181      * 
182      * @return see {@link #getInstance()}
183      * @throws ParserConfigurationException
184      *             see {@link #getInstance()}
185      * @deprecated use {@link #getInstance()} instead.
186      */
187     @Deprecated
188     public static Parser getParser() throws ParserConfigurationException {
189         return Parser.getInstance();
190     }
191 
192     /**
193      * Parse a StreamSource and return its Document.
194      * <p>
195      * This method will auto-detect ODF or XML format and load an appropriate
196      * parser.
197      * 
198      * @param streamSource
199      *            A StreamSource.
200      * @return A DOM Document representation for this source.
201      * @throws SAXException
202      *             if a parse error occurred.
203      * @throws IOException
204      *             if an I/O error occurred.
205      */
206     public Document parseStreamSource(final StreamSource streamSource)
207             throws SAXException, IOException {
208         Document retVal = null;
209         InputStream inputStream = streamSource.getInputStream();
210         if (inputStream != null) {
211 
212             // Alternative 1: Parse as XML, and fall back to ODF
213             if (!inputStream.markSupported()) {
214                 inputStream = new BufferedInputStream(inputStream);
215             }
216             final InputStream filterInput = new UnclosableInputStream(
217                     inputStream);
218             filterInput.mark(Parser.DETECTION_BUFFER_SIZE);
219             try {
220                 retVal = this.parseStreamSourceAsXml(new StreamSource(
221                         filterInput));
222                 inputStream.close();
223             } catch (final SAXParseException se) {
224                 filterInput.reset();
225                 try {
226                     retVal = this.parseStreamSourceAsOdf(new StreamSource(
227                             filterInput));
228                 } catch (final IOException io) {
229                     throw se;
230                 }
231                 inputStream.close();
232             }
233 
234             // Alternative 2: peek for ZIP magic and call matching parser.
235 
236             // final PushbackInputStream pi = new PushbackInputStream(
237             // inputStream, 4);
238             // final byte[] magic = new byte[4];
239             // pi.read(magic);
240             // pi.unread(magic);
241             // if ((magic[0] == 'P') && (magic[1] == 'K') && (magic[2] == 3)
242             // && (magic[3] == 4)) {
243             // retVal = this.parseStreamSourceAsOdf(streamSource);
244             // }
245         }
246         if (retVal == null) {
247             retVal = this.parseStreamSourceAsXml(streamSource);
248         }
249         return retVal;
250     }
251 
252     /**
253      * Parse a given StreamSource which represents an ODF document.
254      * 
255      * @param streamSource
256      *            StreamSource to parse.
257      * @return the Document contained within.
258      * @throws SAXException
259      *             if a parse error occurred.
260      * @throws IOException
261      *             if an I/O error occurred.
262      */
263     public Document parseStreamSourceAsOdf(final StreamSource streamSource)
264             throws IOException, SAXException {
265         final InputStream is = streamSource.getInputStream();
266         if (is == null) {
267             throw new IllegalArgumentException(Parser.BAD_STREAM_SOURCE
268                     + streamSource);
269         }
270         final ZipInputStream zipStream = new ZipInputStream(is);
271         Document document = null;
272         ZipEntry entry = zipStream.getNextEntry();
273         while (entry != null) {
274             if (Parser.CONTENT_XML.equals(entry.getName())) {
275                 document = this.getDocumentBuilder().parse(zipStream);
276                 entry = null;
277             } else {
278                 entry = zipStream.getNextEntry();
279             }
280         }
281         return document;
282     }
283 
284     /**
285      * Parse a given StreamSource which represents an XML document.
286      * 
287      * @param streamSource
288      *            StreamSource to parse.
289      * @return the Document contained within.
290      * @throws SAXException
291      *             if a parse error occurred.
292      * @throws IOException
293      *             if an I/O error occurred.
294      */
295     public Document parseStreamSourceAsXml(final StreamSource streamSource)
296             throws SAXException, IOException {
297         InputSource inp = null;
298         final String systemId = streamSource.getSystemId();
299         if (systemId != null) {
300             inp = new InputSource(systemId);
301         }
302         final InputStream is = streamSource.getInputStream();
303         if ((inp == null) && (is != null)) {
304             inp = new InputSource(is);
305         }
306         final Reader ir = streamSource.getReader();
307         if ((inp == null) && (ir != null)) {
308             inp = new InputSource(ir);
309         }
310 
311         if (inp == null) {
312             throw new IllegalArgumentException(Parser.BAD_STREAM_SOURCE
313                     + streamSource);
314         }
315 
316         return this.getDocumentBuilder().parse(inp);
317     }
318 
319     /**
320      * Retrieve a DocumentBuilder suitable for MathML parsing.
321      * <p>
322      * Please note:
323      * <ul>
324      * <li>There is one instance of the builder per thread.</li>
325      * <li>The builder instance is not thread safe, so it may not be passed
326      * among threads.</li>
327      * <li>Multiple Threads may call getDocumentBuilder concurrently</li>
328      * </ul>
329      * 
330      * @return a DocumentBuilder
331      */
332     public DocumentBuilder getDocumentBuilder() {
333         // Note: No synchronization needed, as id will be different for every
334         // thread!
335         final long id = Thread.currentThread().getId();
336         final Reference<DocumentBuilder> builderRef = this.builders.get(id);
337         if (builderRef != null) {
338             final DocumentBuilder builder = builderRef.get();
339             if (builder != null) {
340                 return builder;
341             }
342         }
343         final DocumentBuilder builder = this.createDocumentBuilder();
344         this.builders.put(id, new SoftReference<DocumentBuilder>(builder));
345         return builder;
346     }
347 
348     /**
349      * Extract the top Node from a given Source.
350      * 
351      * @param source
352      *            the Source to use. Currently supported are {@link DOMSource} ,
353      *            {@link StreamSource}
354      * @return the top NODE.
355      * @throws SAXException
356      *             if a parse error occurred.
357      * @throws IOException
358      *             if an I/O error occurred.
359      */
360     public Node parse(final Source source) throws SAXException, IOException {
361         final Node retVal;
362         if (source instanceof StreamSource) {
363             final StreamSource streamSource = (StreamSource) source;
364             retVal = this.parseStreamSource(streamSource);
365         } else if (source instanceof ImageSource) {
366             final ImageSource imageSource = (ImageSource) source;
367             final StreamSource streamSource = new StreamSource(imageSource
368                     .getInputStream());
369             retVal = this.parseStreamSource(streamSource);
370         } else if (source instanceof DOMSource) {
371             final DOMSource domSource = (DOMSource) source;
372             retVal = domSource.getNode();
373         } else {
374             try {
375                 final Transformer t = TransformerFactory.newInstance()
376                         .newTransformer();
377                 final DOMResult r = new DOMResult();
378                 t.transform(source, r);
379                 retVal = r.getNode();
380             } catch (final TransformerException e) {
381                 Parser.LOGGER.warn(e.getMessage());
382                 throw new IllegalArgumentException(Parser.CANNOT_HANDLE_SOURCE
383                         + source, e);
384             }
385         }
386         return retVal;
387     }
388 }