Coverage Report - net.sourceforge.jeuclid.parser.Parser
 
Classes in this File Line Coverage Branch Coverage Complexity
Parser
46%
48/104
39%
15/38
2,765
Parser$LoggerErrorHandler
28%
2/7
N/A
2,765
Parser$SingletonHolder
50%
2/4
N/A
2,765
Parser$UnclosableInputStream
0%
0/3
N/A
2,765
 
 1  
 /*
 2  
  * Copyright 2007 - 2007 JEuclid, http://jeuclid.sf.net
 3  
  * 
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *      http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 /* $Id: Parser.java,v 2bab6eb875e8 2010/08/11 16:45:50 max $ */
 18  
 
 19  
 package net.sourceforge.jeuclid.parser;
 20  
 
 21  
 import java.io.BufferedInputStream;
 22  
 import java.io.FilterInputStream;
 23  
 import java.io.IOException;
 24  
 import java.io.InputStream;
 25  
 import java.io.Reader;
 26  
 import java.lang.ref.Reference;
 27  
 import java.lang.ref.SoftReference;
 28  
 import java.util.Map;
 29  
 import java.util.concurrent.ConcurrentHashMap;
 30  
 import java.util.zip.ZipEntry;
 31  
 import java.util.zip.ZipInputStream;
 32  
 
 33  
 import javax.annotation.concurrent.ThreadSafe;
 34  
 import javax.xml.parsers.DocumentBuilder;
 35  
 import javax.xml.parsers.DocumentBuilderFactory;
 36  
 import javax.xml.parsers.ParserConfigurationException;
 37  
 import javax.xml.transform.Source;
 38  
 import javax.xml.transform.Transformer;
 39  
 import javax.xml.transform.TransformerException;
 40  
 import javax.xml.transform.TransformerFactory;
 41  
 import javax.xml.transform.dom.DOMResult;
 42  
 import javax.xml.transform.dom.DOMSource;
 43  
 import javax.xml.transform.stream.StreamSource;
 44  
 
 45  
 import net.sourceforge.jeuclid.ResourceEntityResolver;
 46  
 
 47  
 import org.apache.commons.logging.Log;
 48  
 import org.apache.commons.logging.LogFactory;
 49  
 import org.apache.xmlgraphics.image.loader.ImageSource;
 50  
 import org.w3c.dom.Document;
 51  
 import org.w3c.dom.Node;
 52  
 import org.xml.sax.ErrorHandler;
 53  
 import org.xml.sax.InputSource;
 54  
 import org.xml.sax.SAXException;
 55  
 import org.xml.sax.SAXParseException;
 56  
 
 57  
 /**
 58  
  * A JAXP compatible approach to MathML Parsing.
 59  
  * 
 60  
  * @version $Revision: 2bab6eb875e8 $
 61  
  */
 62  
 // CHECKSTYLE:OFF
 63  
 // This class is too complex.
 64  209
 @ThreadSafe
 65  0
 public final class Parser {
 66  
     // CHECKSTYLE:ON
 67  
 
 68  
     private static final class LoggerErrorHandler implements ErrorHandler {
 69  1317
         public LoggerErrorHandler() {
 70  
             // Empty on purpose
 71  1317
         }
 72  
 
 73  
         public void error(final SAXParseException exception)
 74  
                 throws SAXException {
 75  0
             Parser.LOGGER.warn(exception);
 76  0
         }
 77  
 
 78  
         public void fatalError(final SAXParseException exception)
 79  
                 throws SAXException {
 80  0
             throw exception;
 81  
         }
 82  
 
 83  
         public void warning(final SAXParseException exception)
 84  
                 throws SAXException {
 85  0
             Parser.LOGGER.debug(exception);
 86  0
         }
 87  
     }
 88  
 
 89  
     private static final class UnclosableInputStream extends FilterInputStream {
 90  
         protected UnclosableInputStream(final InputStream in) {
 91  0
             super(in);
 92  0
         }
 93  
 
 94  
         @Override
 95  
         public void close() throws IOException {
 96  
             // Do Nothing.
 97  0
         }
 98  
     }
 99  
 
 100  
     /**
 101  
      * Detection buffer size. Rationale: After the first 128 bytes a XML file
 102  
      * and a ZIP file should be distinguishable.
 103  
      */
 104  
     private static final int DETECTION_BUFFER_SIZE = 128;
 105  
 
 106  
     private static final String BAD_STREAM_SOURCE = "Bad StreamSource: ";
 107  
 
 108  
     private static final String CONTENT_XML = "content.xml";
 109  
 
 110  
     private static final String CANNOT_HANDLE_SOURCE = "Cannot handle Source: ";
 111  
 
 112  11558
     private static final class SingletonHolder {
 113  209
         private static final Parser INSTANCE = new Parser();
 114  
 
 115  0
         private SingletonHolder() {
 116  0
         }
 117  
     }
 118  
 
 119  
     /**
 120  
      * Logger for this class.
 121  
      */
 122  209
     private static final Log LOGGER = LogFactory.getLog(Parser.class);
 123  
 
 124  
     private final Map<Long, Reference<DocumentBuilder>> builders;
 125  
 
 126  
     /**
 127  
      * Default constructor.
 128  
      */
 129  209
     protected Parser() {
 130  209
         this.builders = new ConcurrentHashMap<Long, Reference<DocumentBuilder>>();
 131  209
     }
 132  
 
 133  
     private DocumentBuilder createDocumentBuilder() {
 134  
         DocumentBuilder documentBuilder;
 135  
         try {
 136  
             try {
 137  1317
                 documentBuilder = this.tryCreateDocumentBuilder(true);
 138  0
             } catch (final UnsupportedOperationException uoe) {
 139  0
                 Parser.LOGGER.debug("Unsupported Operation: "
 140  
                         + uoe.getMessage());
 141  0
                 documentBuilder = this.tryCreateDocumentBuilder(false);
 142  0
             } catch (final ParserConfigurationException pce) {
 143  0
                 Parser.LOGGER.debug("ParserConfigurationException: "
 144  
                         + pce.getMessage());
 145  0
                 documentBuilder = this.tryCreateDocumentBuilder(false);
 146  1317
             }
 147  1317
             documentBuilder.setEntityResolver(new ResourceEntityResolver());
 148  1317
             documentBuilder.setErrorHandler(new LoggerErrorHandler());
 149  0
         } catch (final ParserConfigurationException pce2) {
 150  0
             Parser.LOGGER.warn("Could not create Parser: " + pce2.getMessage());
 151  0
             assert false : "Could not create Parser";
 152  0
             documentBuilder = null;
 153  1317
         }
 154  1317
         return documentBuilder;
 155  
     }
 156  
 
 157  
     private DocumentBuilder tryCreateDocumentBuilder(final boolean xinclude)
 158  
             throws ParserConfigurationException {
 159  1317
         final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory
 160  
                 .newInstance();
 161  1317
         documentBuilderFactory.setNamespaceAware(true);
 162  1317
         if (xinclude) {
 163  1317
             documentBuilderFactory.setXIncludeAware(true);
 164  
         }
 165  1317
         final DocumentBuilder documentBuilder = documentBuilderFactory
 166  
                 .newDocumentBuilder();
 167  1317
         return documentBuilder;
 168  
     }
 169  
 
 170  
     /**
 171  
      * Retrieve the singleton Parser instance.
 172  
      * 
 173  
      * @return a Parser object.
 174  
      */
 175  
     public static Parser getInstance() {
 176  11558
         return Parser.SingletonHolder.INSTANCE;
 177  
     }
 178  
 
 179  
     /**
 180  
      * use {@link #getInstance()} instead.
 181  
      * 
 182  
      * @return see {@link #getInstance()}
 183  
      * @throws ParserConfigurationException
 184  
      *             see {@link #getInstance()}
 185  
      * @deprecated use {@link #getInstance()} instead.
 186  
      */
 187  
     @Deprecated
 188  
     public static Parser getParser() throws ParserConfigurationException {
 189  0
         return Parser.getInstance();
 190  
     }
 191  
 
 192  
     /**
 193  
      * Parse a StreamSource and return its Document.
 194  
      * <p>
 195  
      * This method will auto-detect ODF or XML format and load an appropriate
 196  
      * parser.
 197  
      * 
 198  
      * @param streamSource
 199  
      *            A StreamSource.
 200  
      * @return A DOM Document representation for this source.
 201  
      * @throws SAXException
 202  
      *             if a parse error occurred.
 203  
      * @throws IOException
 204  
      *             if an I/O error occurred.
 205  
      */
 206  
     public Document parseStreamSource(final StreamSource streamSource)
 207  
             throws SAXException, IOException {
 208  0
         Document retVal = null;
 209  0
         InputStream inputStream = streamSource.getInputStream();
 210  0
         if (inputStream != null) {
 211  
 
 212  
             // Alternative 1: Parse as XML, and fall back to ODF
 213  0
             if (!inputStream.markSupported()) {
 214  0
                 inputStream = new BufferedInputStream(inputStream);
 215  
             }
 216  0
             final InputStream filterInput = new UnclosableInputStream(
 217  
                     inputStream);
 218  0
             filterInput.mark(Parser.DETECTION_BUFFER_SIZE);
 219  
             try {
 220  0
                 retVal = this.parseStreamSourceAsXml(new StreamSource(
 221  
                         filterInput));
 222  0
                 inputStream.close();
 223  0
             } catch (final SAXParseException se) {
 224  0
                 filterInput.reset();
 225  
                 try {
 226  0
                     retVal = this.parseStreamSourceAsOdf(new StreamSource(
 227  
                             filterInput));
 228  0
                 } catch (final IOException io) {
 229  0
                     throw se;
 230  0
                 }
 231  0
                 inputStream.close();
 232  0
             }
 233  
 
 234  
             // Alternative 2: peek for ZIP magic and call matching parser.
 235  
 
 236  
             // final PushbackInputStream pi = new PushbackInputStream(
 237  
             // inputStream, 4);
 238  
             // final byte[] magic = new byte[4];
 239  
             // pi.read(magic);
 240  
             // pi.unread(magic);
 241  
             // if ((magic[0] == 'P') && (magic[1] == 'K') && (magic[2] == 3)
 242  
             // && (magic[3] == 4)) {
 243  
             // retVal = this.parseStreamSourceAsOdf(streamSource);
 244  
             // }
 245  
         }
 246  0
         if (retVal == null) {
 247  0
             retVal = this.parseStreamSourceAsXml(streamSource);
 248  
         }
 249  0
         return retVal;
 250  
     }
 251  
 
 252  
     /**
 253  
      * Parse a given StreamSource which represents an ODF document.
 254  
      * 
 255  
      * @param streamSource
 256  
      *            StreamSource to parse.
 257  
      * @return the Document contained within.
 258  
      * @throws SAXException
 259  
      *             if a parse error occurred.
 260  
      * @throws IOException
 261  
      *             if an I/O error occurred.
 262  
      */
 263  
     public Document parseStreamSourceAsOdf(final StreamSource streamSource)
 264  
             throws IOException, SAXException {
 265  209
         final InputStream is = streamSource.getInputStream();
 266  209
         if (is == null) {
 267  0
             throw new IllegalArgumentException(Parser.BAD_STREAM_SOURCE
 268  
                     + streamSource);
 269  
         }
 270  209
         final ZipInputStream zipStream = new ZipInputStream(is);
 271  209
         Document document = null;
 272  209
         ZipEntry entry = zipStream.getNextEntry();
 273  2299
         while (entry != null) {
 274  2090
             if (Parser.CONTENT_XML.equals(entry.getName())) {
 275  209
                 document = this.getDocumentBuilder().parse(zipStream);
 276  209
                 entry = null;
 277  
             } else {
 278  1881
                 entry = zipStream.getNextEntry();
 279  
             }
 280  
         }
 281  209
         return document;
 282  
     }
 283  
 
 284  
     /**
 285  
      * Parse a given StreamSource which represents an XML document.
 286  
      * 
 287  
      * @param streamSource
 288  
      *            StreamSource to parse.
 289  
      * @return the Document contained within.
 290  
      * @throws SAXException
 291  
      *             if a parse error occurred.
 292  
      * @throws IOException
 293  
      *             if an I/O error occurred.
 294  
      */
 295  
     public Document parseStreamSourceAsXml(final StreamSource streamSource)
 296  
             throws SAXException, IOException {
 297  7796
         InputSource inp = null;
 298  7796
         final String systemId = streamSource.getSystemId();
 299  7796
         if (systemId != null) {
 300  0
             inp = new InputSource(systemId);
 301  
         }
 302  7796
         final InputStream is = streamSource.getInputStream();
 303  7796
         if ((inp == null) && (is != null)) {
 304  0
             inp = new InputSource(is);
 305  
         }
 306  7796
         final Reader ir = streamSource.getReader();
 307  7796
         if ((inp == null) && (ir != null)) {
 308  7796
             inp = new InputSource(ir);
 309  
         }
 310  
 
 311  7796
         if (inp == null) {
 312  0
             throw new IllegalArgumentException(Parser.BAD_STREAM_SOURCE
 313  
                     + streamSource);
 314  
         }
 315  
 
 316  7796
         return this.getDocumentBuilder().parse(inp);
 317  
     }
 318  
 
 319  
     /**
 320  
      * Retrieve a DocumentBuilder suitable for MathML parsing.
 321  
      * <p>
 322  
      * Please note:
 323  
      * <ul>
 324  
      * <li>There is one instance of the builder per thread.</li>
 325  
      * <li>The builder instance is not thread safe, so it may not be passed
 326  
      * among threads.</li>
 327  
      * <li>Multiple Threads may call getDocumentBuilder concurrently</li>
 328  
      * </ul>
 329  
      * 
 330  
      * @return a DocumentBuilder
 331  
      */
 332  
     public DocumentBuilder getDocumentBuilder() {
 333  
         // Note: No synchronization needed, as id will be different for every
 334  
         // thread!
 335  11558
         final long id = Thread.currentThread().getId();
 336  11558
         final Reference<DocumentBuilder> builderRef = this.builders.get(id);
 337  11558
         if (builderRef != null) {
 338  10241
             final DocumentBuilder builder = builderRef.get();
 339  10241
             if (builder != null) {
 340  10241
                 return builder;
 341  
             }
 342  
         }
 343  1317
         final DocumentBuilder builder = this.createDocumentBuilder();
 344  1317
         this.builders.put(id, new SoftReference<DocumentBuilder>(builder));
 345  1317
         return builder;
 346  
     }
 347  
 
 348  
     /**
 349  
      * Extract the top Node from a given Source.
 350  
      * 
 351  
      * @param source
 352  
      *            the Source to use. Currently supported are {@link DOMSource} ,
 353  
      *            {@link StreamSource}
 354  
      * @return the top NODE.
 355  
      * @throws SAXException
 356  
      *             if a parse error occurred.
 357  
      * @throws IOException
 358  
      *             if an I/O error occurred.
 359  
      */
 360  
     public Node parse(final Source source) throws SAXException, IOException {
 361  
         final Node retVal;
 362  0
         if (source instanceof StreamSource) {
 363  0
             final StreamSource streamSource = (StreamSource) source;
 364  0
             retVal = this.parseStreamSource(streamSource);
 365  0
         } else if (source instanceof ImageSource) {
 366  0
             final ImageSource imageSource = (ImageSource) source;
 367  0
             final StreamSource streamSource = new StreamSource(imageSource
 368  
                     .getInputStream());
 369  0
             retVal = this.parseStreamSource(streamSource);
 370  0
         } else if (source instanceof DOMSource) {
 371  0
             final DOMSource domSource = (DOMSource) source;
 372  0
             retVal = domSource.getNode();
 373  0
         } else {
 374  
             try {
 375  0
                 final Transformer t = TransformerFactory.newInstance()
 376  
                         .newTransformer();
 377  0
                 final DOMResult r = new DOMResult();
 378  0
                 t.transform(source, r);
 379  0
                 retVal = r.getNode();
 380  0
             } catch (final TransformerException e) {
 381  0
                 Parser.LOGGER.warn(e.getMessage());
 382  0
                 throw new IllegalArgumentException(Parser.CANNOT_HANDLE_SOURCE
 383  
                         + source, e);
 384  0
             }
 385  
         }
 386  0
         return retVal;
 387  
     }
 388  
 }