Coverage Report - net.sourceforge.jeuclid.elements.support.text.CharacterMapping
 
Classes in this File Line Coverage Branch Coverage Complexity
CharacterMapping
50%
125/246
46%
74/160
4,625
 
 1  
 /*
 2  
  * Copyright 2002 - 2007 JEuclid, http://jeuclid.sf.net
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *      http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 /* $Id: CharacterMapping.java,v 74b8e95997bf 2010/08/11 17:45:46 max $ */
 18  
 
 19  
 package net.sourceforge.jeuclid.elements.support.text;
 20  
 
 21  
 import java.awt.Font;
 22  
 import java.io.BufferedReader;
 23  
 import java.io.IOException;
 24  
 import java.io.InputStream;
 25  
 import java.io.InputStreamReader;
 26  
 import java.io.ObjectInput;
 27  
 import java.io.ObjectInputStream;
 28  
 import java.io.Serializable;
 29  
 import java.lang.ref.Reference;
 30  
 import java.lang.ref.SoftReference;
 31  
 import java.util.ArrayList;
 32  
 import java.util.EnumMap;
 33  
 import java.util.HashMap;
 34  
 import java.util.HashSet;
 35  
 import java.util.List;
 36  
 import java.util.Map;
 37  
 import java.util.Set;
 38  
 
 39  
 import net.sourceforge.jeuclid.elements.support.attributes.FontFamily;
 40  
 import net.sourceforge.jeuclid.elements.support.attributes.MathVariant;
 41  
 
 42  
 import org.apache.commons.logging.Log;
 43  
 import org.apache.commons.logging.LogFactory;
 44  
 import org.apache.xmlgraphics.fonts.Glyphs;
 45  
 
 46  
 /**
 47  
  * @version $Revision: 74b8e95997bf $
 48  
  */
 49  
 public final class CharacterMapping implements Serializable {
 50  
 
 51  
     private static final String LOAD_ERROR = "Error loading character mappings";
 52  
 
 53  
     private static final int POS_CODESTR = 0;
 54  
 
 55  
     private static final int POS_DESCRIPTION = 1;
 56  
     
 57  
     private static final int POS_CATEGORY = 2;
 58  
     
 59  
     private static final int POS_MAPS = 5;
 60  
 
 61  
 
 62  
 
 63  
     private static final int HIGHPLANE_MATH_CHARS_START = 0x1D400;
 64  
 
 65  
     private static final int HIGHPLANE_START = 0x10000;
 66  
 
 67  
     /**
 68  
      * 
 69  
      */
 70  
     private static final long serialVersionUID = 1L;
 71  
 
 72  
     private static CharacterMapping instance;
 73  
 
 74  198
     /**
 75  
      * Logger for this class.
 76  
      */
 77  11
     private static final Log LOGGER = LogFactory
 78  
             .getLog(CharacterMapping.class);
 79  
 
 80  
     private final Map<Integer, CodePointAndVariant> extractAttrs;
 81  
 
 82  
     private final Set<Integer> forceSet;
 83  
 
 84  
     private final Set<Integer> markSet;
 85  
     
 86  
     private final Map<FontFamily, Map<Integer, Integer[]>> composeAttrs;
 87  
 
 88  0
     private transient Map<CodePointAndVariant, Reference<List<CodePointAndVariant>>> alternatives;
 89  0
 
 90  0
     /**
 91  0
      * Default Constructor.
 92  
      */
 93  0
     private CharacterMapping() {
 94  0
         this.extractAttrs = new HashMap<Integer, CodePointAndVariant>();
 95  0
         this.forceSet = new HashSet<Integer>();
 96  0
         this.markSet = new HashSet<Integer>();
 97  0
         this.composeAttrs = new EnumMap<FontFamily, Map<Integer, Integer[]>>(
 98  198
                 FontFamily.class);
 99  198
         this.readResolve();
 100  0
         this.loadUnicodeData();
 101  0
     }
 102  
 
 103  0
     private Object readResolve() {
 104  11
         this.alternatives = new HashMap<CodePointAndVariant, Reference<List<CodePointAndVariant>>>();
 105  11
         return this;
 106  0
     }
 107  
 
 108  
     private void loadUnicodeData() {
 109  0
         final InputStream is = CharacterMapping.class
 110  0
                 .getResourceAsStream("/net/sourceforge/jeuclid/UnicodeData.txt");
 111  0
         try {
 112  0
             final BufferedReader r = new BufferedReader(
 113  0
                     new InputStreamReader(is));
 114  
             try {
 115  
                 String s;
 116  0
                 while ((s = r.readLine()) != null) {
 117  0
                     final String[] c = s.split(";");
 118  0
                     if (c.length > CharacterMapping.POS_MAPS) {
 119  0
                         this.process(c[CharacterMapping.POS_CODESTR],
 120  
                                 c[CharacterMapping.POS_DESCRIPTION],
 121  0
                                 c[CharacterMapping.POS_CATEGORY],
 122  0
                                 c[CharacterMapping.POS_MAPS]);
 123  0
                     }
 124  0
                 }
 125  0
             } catch (final IOException e) {
 126  0
                 CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, e);
 127  0
             } finally {
 128  0
                 try {
 129  0
                     r.close();
 130  0
                 } catch (final IOException e) {
 131  0
                     CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR,
 132  
                             e);
 133  0
                 }
 134  0
             }
 135  0
         } catch (final NullPointerException e) {
 136  0
             CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, e);
 137  0
         }
 138  0
     }
 139  
 
 140  0
     private void process(final String codestr, final String descr,
 141  
             final String category, final String mapsStr) {
 142  0
         try {
 143  0
             final int codepoint = Integer.parseInt(codestr, 16);
 144  0
             
 145  0
             if (category.startsWith("M")) {
 146  0
                 this.markSet.add(codepoint);
 147  0
             }
 148  
             
 149  0
             if (!mapsStr.startsWith("<font> ")) {
 150  0
                 return;
 151  
             }
 152  0
             final int mapsTo = Integer.parseInt(mapsStr.substring(7), 16);
 153  0
 
 154  0
             final int awtStyle = this.parseAwtStyle(descr);
 155  0
             final FontFamily fam = this.parseFontFamily(descr);
 156  0
             if (fam == null) {
 157  0
                 return;
 158  
             }
 159  0
 
 160  0
             final boolean force = (codepoint >= CharacterMapping.HIGHPLANE_MATH_CHARS_START)
 161  0
                     && ((FontFamily.SANSSERIF.equals(fam)) || (FontFamily.SERIF
 162  
                             .equals(fam)));
 163  0
             if (force) {
 164  0
                 this.forceSet.add(codepoint);
 165  0
             }
 166  0
             
 167  0
             
 168  0
             final CodePointAndVariant cpav = new CodePointAndVariant(mapsTo,
 169  0
                     new MathVariant(awtStyle, fam));
 170  0
             this.extractAttrs.put(codepoint, cpav);
 171  0
             final Map<Integer, Integer[]> ffmap = this.getFFMap(fam);
 172  0
             final Integer[] ia = this.getMapsTo(mapsTo, ffmap);
 173  0
             ia[awtStyle] = codepoint;
 174  0
         } catch (final NumberFormatException nfe) {
 175  0
             CharacterMapping.LOGGER.debug("Parse Error", nfe);
 176  0
         }
 177  0
     }
 178  0
 
 179  
     private Integer[] getMapsTo(final int mapsTo,
 180  
             final Map<Integer, Integer[]> ffmap) {
 181  0
         Integer[] ia = ffmap.get(mapsTo);
 182  0
         if (ia == null) {
 183  0
             ia = new Integer[Font.BOLD + Font.ITALIC + 1];
 184  0
             ffmap.put(mapsTo, ia);
 185  0
         }
 186  0
         return ia;
 187  0
     }
 188  
 
 189  
     private Map<Integer, Integer[]> getFFMap(final FontFamily fam) {
 190  0
         Map<Integer, Integer[]> ffmap = this.composeAttrs.get(fam);
 191  0
         if (ffmap == null) {
 192  0
             ffmap = new HashMap<Integer, Integer[]>();
 193  0
             this.composeAttrs.put(fam, ffmap);
 194  
         }
 195  0
         return ffmap;
 196  0
     }
 197  
 
 198  0
     private int parseAwtStyle(final String descr) {
 199  0
         int awtStyle = Font.PLAIN;
 200  0
         if (descr.contains("BOLD")) {
 201  0
             awtStyle += Font.BOLD;
 202  
         }
 203  0
         if (descr.contains("ITALIC")) {
 204  0
             awtStyle += Font.ITALIC;
 205  0
         }
 206  0
         return awtStyle;
 207  0
     }
 208  
 
 209  0
     private FontFamily parseFontFamily(final String descr) {
 210  0
         final FontFamily fam;
 211  0
         if (descr.contains("DOUBLE-STRUCK")) {
 212  0
             fam = FontFamily.DOUBLE_STRUCK;
 213  0
         } else if (descr.contains("SCRIPT")) {
 214  0
             fam = FontFamily.SCRIPT;
 215  0
         } else if (descr.contains("BLACK-LETTER")
 216  
                 || descr.contains("FRAKTUR")) {
 217  0
             fam = FontFamily.FRAKTUR;
 218  0
         } else if (descr.contains("SANS-SERIF")) {
 219  0
             fam = FontFamily.SANSSERIF;
 220  0
         } else if (descr.contains("MONOSPACE")) {
 221  0
             fam = FontFamily.MONOSPACED;
 222  0
         } else if (descr.contains("MATHEMATICAL")) {
 223  0
             fam = FontFamily.SERIF;
 224  
         } else {
 225  0
             fam = null;
 226  
         }
 227  0
         return fam;
 228  594
     }
 229  
 
 230  
     /**
 231  198
      * Get the singleton instance of this class.
 232  
      * 
 233  198
      * @return an instance of CharacterMapping.
 234  198
      */
 235  198
     public static synchronized CharacterMapping getInstance() {
 236  33
         if (CharacterMapping.instance == null) {
 237  0
             CharacterMapping m;
 238  0
             try {
 239  11
                 final InputStream is = CharacterMapping.class
 240  0
                         .getResourceAsStream("/net/sourceforge/jeuclid/charmap.ser");
 241  11
                 final ObjectInput oi = new ObjectInputStream(is);
 242  11
                 m = (CharacterMapping) oi.readObject();
 243  11
                 oi.close();
 244  198
             } catch (final ClassNotFoundException cnfe) {
 245  198
                 m = null;
 246  0
             } catch (final IllegalArgumentException e) {
 247  0
                 m = null;
 248  198
             } catch (final IOException e) {
 249  0
                 m = null;
 250  0
             } catch (final NullPointerException e) {
 251  594
                 m = null;
 252  11
             }
 253  11
             if (m == null) {
 254  0
                 CharacterMapping.instance = new CharacterMapping();
 255  
             } else {
 256  11
                 CharacterMapping.instance = m;
 257  
             }
 258  
         }
 259  33
         return CharacterMapping.instance;
 260  
     }
 261  
 
 262  
     /**
 263  
      * Compose a new SERIF Unicode char. This function tries to compose the
 264  
      * given char into a SERIF char which shows the same characteristics at a
 265  
      * particular Unicode codepoint.
 266  
      * 
 267  17739
      * @param split
 268  17739
      *            the char which contains a coidepoint and variant.
 269  
      * @param forbidHighplane
 270  17739
      *            if the high plane is broken (e.g. on OS X).
 271  0
      * @return a CodePointAndVariant representing the same char.
 272  
      */
 273  17739
     public CodePointAndVariant composeUnicodeChar(
 274  17739
             final CodePointAndVariant split, final boolean forbidHighplane) {
 275  7836
         final MathVariant splitVariant = split.getVariant();
 276  987
         final Map<Integer, Integer[]> famList = this.composeAttrs
 277  
                 .get(splitVariant.getFontFamily());
 278  11877
         if (famList == null) {
 279  10890
             return split;
 280  10890
         }
 281  5405
         final Integer[] aList = famList.get(split.getCodePoint());
 282  4811
         if (aList == null) {
 283  381
             return split;
 284  594
         }
 285  
 
 286  7078
         final int splitStyle = splitVariant.getAwtStyle();
 287  1398
         Integer to = aList[splitStyle];
 288  606
         if (to != null) {
 289  6716
             if (forbidHighplane && to >= CharacterMapping.HIGHPLANE_START) {
 290  805
                 return split;
 291  396
             }
 292  33
             return new CodePointAndVariant(to, MathVariant.NORMAL);
 293  198
         }
 294  362
         if (splitStyle != 0) {
 295  44
             to = aList[0];
 296  5878
         }
 297  362
         if (to != null) {
 298  33
             if (forbidHighplane && to >= CharacterMapping.HIGHPLANE_START) {
 299  22
                 return split;
 300  
             }
 301  11
             return new CodePointAndVariant(to, new MathVariant(splitStyle,
 302  
                     FontFamily.SERIF));
 303  
         }
 304  329
         return split;
 305  
 
 306  
     }
 307  
 
 308  
     /**
 309  
      * Extract the given char into variant and codepoint.
 310  17343
      * 
 311  
      * @param test
 312  17343
      *            the Unicode char to split up.
 313  16551
      * @return A {@link CodePointAndVariant} representing the same character
 314  
      *         with explicit variant.
 315  792
      */
 316  792
     public CodePointAndVariant extractUnicodeAttr(
 317  792
             final CodePointAndVariant test) {
 318  965
         final CodePointAndVariant mapsTo = this.extractAttrs.get(test
 319  792
                 .getCodePoint());
 320  965
         if (mapsTo == null) {
 321  921
             return test;
 322  
         }
 323  836
         final MathVariant testVariant = test.getVariant();
 324  836
         final int testStyle = testVariant.getAwtStyle();
 325  44
         final int mapsToCodepoint = mapsTo.getCodePoint();
 326  
         final CodePointAndVariant retVal;
 327  44
         if ((testStyle == Font.PLAIN)
 328  792
                 || (this.forceSet.contains(mapsToCodepoint))) {
 329  0
             retVal = mapsTo;
 330  
         } else {
 331  44
             final MathVariant mapsToVariant = mapsTo.getVariant();
 332  44
             retVal = new CodePointAndVariant(mapsToCodepoint,
 333  
                     new MathVariant(testStyle | mapsToVariant.getAwtStyle(),
 334  
                             mapsToVariant.getFontFamily()));
 335  
         }
 336  44
         return retVal;
 337  
     }
 338  
 
 339  
     /**
 340  
      * Get all alternatives codePoints for this codePoint. They can be used if
 341  66730
      * the original code point and variant is not available.
 342  
      * 
 343  66730
      * @param cpav
 344  66730
      *            original CodePointAndVariant
 345  52462
      * @return A List of alternative code points to check.
 346  
      */
 347  66730
     public List<CodePointAndVariant> getAllAlternatives(
 348  14268
             final CodePointAndVariant cpav) {
 349  17987
         final Reference<List<CodePointAndVariant>> ref = this.alternatives
 350  
                 .get(cpav);
 351  3719
         List<CodePointAndVariant> result = null;
 352  70449
         if (ref != null) {
 353  2925
             result = ref.get();
 354  
         }
 355  3719
         if (result == null) {
 356  794
             result = this.reallyGetAllAternatives(cpav, true);
 357  17741
             this.alternatives.put(cpav,
 358  
                     new SoftReference<List<CodePointAndVariant>>(result));
 359  
         }
 360  20666
         return result;
 361  
     }
 362  16947
 
 363  
     private List<CodePointAndVariant> reallyGetAllAternatives(
 364  
             final CodePointAndVariant cpav, final boolean useGlyphMapping) {
 365  17890
         final List<CodePointAndVariant> list = new ArrayList<CodePointAndVariant>(
 366  16947
                 3);
 367  16947
 
 368  943
         final CodePointAndVariant cpav2 = this.extractUnicodeAttr(cpav);
 369  16947
         // High Plane is broken on OS X!
 370  943
         final CodePointAndVariant cpav3 = this.composeUnicodeChar(cpav2,
 371  
                 StringUtil.OSX);
 372  
 
 373  943
         this.addGlyphsAndTheirAlternatives(list, cpav2, useGlyphMapping);
 374  943
         this.addGlyphsAndTheirAlternatives(list, cpav3, useGlyphMapping);
 375  51784
         this.addGlyphsAndTheirAlternatives(list, cpav, useGlyphMapping);
 376  17541
 
 377  18484
         return list;
 378  14862
     }
 379  
 
 380  
     private void addGlyphsAndTheirAlternatives(
 381  50841
             final List<CodePointAndVariant> list,
 382  
             final CodePointAndVariant cpav, final boolean useGlyphMapping) {
 383  2829
         if (!list.contains(cpav)) {
 384  976
             list.add(cpav);
 385  15838
             if (useGlyphMapping) {
 386  15689
                 this.addAlternateGlyph(list, cpav);
 387  14862
             }
 388  14862
         }
 389  2829
     }
 390  14862
 
 391  4764
     private void addAlternateGlyph(final List<CodePointAndVariant> list,
 392  2679
             final CodePointAndVariant cpav) {
 393  827
         final int codePoint = cpav.getCodePoint();
 394  3506
         final String charAsString = new String(new int[] { codePoint }, 0, 1);
 395  827
         final String glyphName = Glyphs.stringToGlyph(charAsString);
 396  827
         final String[] alternateGlyphNames = Glyphs
 397  2679
                 .getCharNameAlternativesFor(glyphName);
 398  3506
         if (alternateGlyphNames != null) {
 399  2350
             for (final String altGlyph : alternateGlyphNames) {
 400  149
                 final int altcp = Glyphs.getUnicodeSequenceForGlyphName(
 401  
                         altGlyph).codePointAt(0);
 402  149
                 final List<CodePointAndVariant> alternateList = this
 403  
                         .reallyGetAllAternatives(new CodePointAndVariant(
 404  14862
                                 altcp, cpav.getVariant()), false);
 405  149
                 for (final CodePointAndVariant alternateCpav : alternateList) {
 406  149
                     if (!list.contains(alternateCpav)) {
 407  116
                         list.add(alternateCpav);
 408  
                     }
 409  
                 }
 410  
             }
 411  
         }
 412  827
     }
 413  
 
 414  
     /**
 415  
      * Checks if the given codepoint is a "marking" codepoint. Marking
 416  
      * codepoints do not display by themself, but are usually combined with the
 417  
      * previous character.
 418  
      * 
 419  
      * @param codepoint
 420  
      *            codepoint to check.
 421  
      * @return true if this codepoint reprensents a mark.
 422  
      */
 423  
     public boolean isMark(int codepoint) {
 424  4358
         return this.markSet.contains(codepoint);
 425  
     }
 426  
     
 427  
 }