View Javadoc

1   /*
2    * Copyright 2002 - 2007 JEuclid, http://jeuclid.sf.net
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  /* $Id: CharacterMapping.java,v 74b8e95997bf 2010/08/11 17:45:46 max $ */
18  
19  package net.sourceforge.jeuclid.elements.support.text;
20  
21  import java.awt.Font;
22  import java.io.BufferedReader;
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.InputStreamReader;
26  import java.io.ObjectInput;
27  import java.io.ObjectInputStream;
28  import java.io.Serializable;
29  import java.lang.ref.Reference;
30  import java.lang.ref.SoftReference;
31  import java.util.ArrayList;
32  import java.util.EnumMap;
33  import java.util.HashMap;
34  import java.util.HashSet;
35  import java.util.List;
36  import java.util.Map;
37  import java.util.Set;
38  
39  import net.sourceforge.jeuclid.elements.support.attributes.FontFamily;
40  import net.sourceforge.jeuclid.elements.support.attributes.MathVariant;
41  
42  import org.apache.commons.logging.Log;
43  import org.apache.commons.logging.LogFactory;
44  import org.apache.xmlgraphics.fonts.Glyphs;
45  
46  /**
47   * @version $Revision: 74b8e95997bf $
48   */
49  public final class CharacterMapping implements Serializable {
50  
51      private static final String LOAD_ERROR = "Error loading character mappings";
52  
53      private static final int POS_CODESTR = 0;
54  
55      private static final int POS_DESCRIPTION = 1;
56      
57      private static final int POS_CATEGORY = 2;
58      
59      private static final int POS_MAPS = 5;
60  
61  
62  
63      private static final int HIGHPLANE_MATH_CHARS_START = 0x1D400;
64  
65      private static final int HIGHPLANE_START = 0x10000;
66  
67      /**
68       * 
69       */
70      private static final long serialVersionUID = 1L;
71  
72      private static CharacterMapping instance;
73  
74      /**
75       * Logger for this class.
76       */
77      private static final Log LOGGER = LogFactory
78              .getLog(CharacterMapping.class);
79  
80      private final Map<Integer, CodePointAndVariant> extractAttrs;
81  
82      private final Set<Integer> forceSet;
83  
84      private final Set<Integer> markSet;
85      
86      private final Map<FontFamily, Map<Integer, Integer[]>> composeAttrs;
87  
88      private transient Map<CodePointAndVariant, Reference<List<CodePointAndVariant>>> alternatives;
89  
90      /**
91       * Default Constructor.
92       */
93      private CharacterMapping() {
94          this.extractAttrs = new HashMap<Integer, CodePointAndVariant>();
95          this.forceSet = new HashSet<Integer>();
96          this.markSet = new HashSet<Integer>();
97          this.composeAttrs = new EnumMap<FontFamily, Map<Integer, Integer[]>>(
98                  FontFamily.class);
99          this.readResolve();
100         this.loadUnicodeData();
101     }
102 
103     private Object readResolve() {
104         this.alternatives = new HashMap<CodePointAndVariant, Reference<List<CodePointAndVariant>>>();
105         return this;
106     }
107 
108     private void loadUnicodeData() {
109         final InputStream is = CharacterMapping.class
110                 .getResourceAsStream("/net/sourceforge/jeuclid/UnicodeData.txt");
111         try {
112             final BufferedReader r = new BufferedReader(
113                     new InputStreamReader(is));
114             try {
115                 String s;
116                 while ((s = r.readLine()) != null) {
117                     final String[] c = s.split(";");
118                     if (c.length > CharacterMapping.POS_MAPS) {
119                         this.process(c[CharacterMapping.POS_CODESTR],
120                                 c[CharacterMapping.POS_DESCRIPTION],
121                                 c[CharacterMapping.POS_CATEGORY],
122                                 c[CharacterMapping.POS_MAPS]);
123                     }
124                 }
125             } catch (final IOException e) {
126                 CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, e);
127             } finally {
128                 try {
129                     r.close();
130                 } catch (final IOException e) {
131                     CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR,
132                             e);
133                 }
134             }
135         } catch (final NullPointerException e) {
136             CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, e);
137         }
138     }
139 
140     private void process(final String codestr, final String descr,
141             final String category, final String mapsStr) {
142         try {
143             final int codepoint = Integer.parseInt(codestr, 16);
144             
145             if (category.startsWith("M")) {
146                 this.markSet.add(codepoint);
147             }
148             
149             if (!mapsStr.startsWith("<font> ")) {
150                 return;
151             }
152             final int mapsTo = Integer.parseInt(mapsStr.substring(7), 16);
153 
154             final int awtStyle = this.parseAwtStyle(descr);
155             final FontFamily fam = this.parseFontFamily(descr);
156             if (fam == null) {
157                 return;
158             }
159 
160             final boolean force = (codepoint >= CharacterMapping.HIGHPLANE_MATH_CHARS_START)
161                     && ((FontFamily.SANSSERIF.equals(fam)) || (FontFamily.SERIF
162                             .equals(fam)));
163             if (force) {
164                 this.forceSet.add(codepoint);
165             }
166             
167             
168             final CodePointAndVariant cpav = new CodePointAndVariant(mapsTo,
169                     new MathVariant(awtStyle, fam));
170             this.extractAttrs.put(codepoint, cpav);
171             final Map<Integer, Integer[]> ffmap = this.getFFMap(fam);
172             final Integer[] ia = this.getMapsTo(mapsTo, ffmap);
173             ia[awtStyle] = codepoint;
174         } catch (final NumberFormatException nfe) {
175             CharacterMapping.LOGGER.debug("Parse Error", nfe);
176         }
177     }
178 
179     private Integer[] getMapsTo(final int mapsTo,
180             final Map<Integer, Integer[]> ffmap) {
181         Integer[] ia = ffmap.get(mapsTo);
182         if (ia == null) {
183             ia = new Integer[Font.BOLD + Font.ITALIC + 1];
184             ffmap.put(mapsTo, ia);
185         }
186         return ia;
187     }
188 
189     private Map<Integer, Integer[]> getFFMap(final FontFamily fam) {
190         Map<Integer, Integer[]> ffmap = this.composeAttrs.get(fam);
191         if (ffmap == null) {
192             ffmap = new HashMap<Integer, Integer[]>();
193             this.composeAttrs.put(fam, ffmap);
194         }
195         return ffmap;
196     }
197 
198     private int parseAwtStyle(final String descr) {
199         int awtStyle = Font.PLAIN;
200         if (descr.contains("BOLD")) {
201             awtStyle += Font.BOLD;
202         }
203         if (descr.contains("ITALIC")) {
204             awtStyle += Font.ITALIC;
205         }
206         return awtStyle;
207     }
208 
209     private FontFamily parseFontFamily(final String descr) {
210         final FontFamily fam;
211         if (descr.contains("DOUBLE-STRUCK")) {
212             fam = FontFamily.DOUBLE_STRUCK;
213         } else if (descr.contains("SCRIPT")) {
214             fam = FontFamily.SCRIPT;
215         } else if (descr.contains("BLACK-LETTER")
216                 || descr.contains("FRAKTUR")) {
217             fam = FontFamily.FRAKTUR;
218         } else if (descr.contains("SANS-SERIF")) {
219             fam = FontFamily.SANSSERIF;
220         } else if (descr.contains("MONOSPACE")) {
221             fam = FontFamily.MONOSPACED;
222         } else if (descr.contains("MATHEMATICAL")) {
223             fam = FontFamily.SERIF;
224         } else {
225             fam = null;
226         }
227         return fam;
228     }
229 
230     /**
231      * Get the singleton instance of this class.
232      * 
233      * @return an instance of CharacterMapping.
234      */
235     public static synchronized CharacterMapping getInstance() {
236         if (CharacterMapping.instance == null) {
237             CharacterMapping m;
238             try {
239                 final InputStream is = CharacterMapping.class
240                         .getResourceAsStream("/net/sourceforge/jeuclid/charmap.ser");
241                 final ObjectInput oi = new ObjectInputStream(is);
242                 m = (CharacterMapping) oi.readObject();
243                 oi.close();
244             } catch (final ClassNotFoundException cnfe) {
245                 m = null;
246             } catch (final IllegalArgumentException e) {
247                 m = null;
248             } catch (final IOException e) {
249                 m = null;
250             } catch (final NullPointerException e) {
251                 m = null;
252             }
253             if (m == null) {
254                 CharacterMapping.instance = new CharacterMapping();
255             } else {
256                 CharacterMapping.instance = m;
257             }
258         }
259         return CharacterMapping.instance;
260     }
261 
262     /**
263      * Compose a new SERIF Unicode char. This function tries to compose the
264      * given char into a SERIF char which shows the same characteristics at a
265      * particular Unicode codepoint.
266      * 
267      * @param split
268      *            the char which contains a coidepoint and variant.
269      * @param forbidHighplane
270      *            if the high plane is broken (e.g. on OS X).
271      * @return a CodePointAndVariant representing the same char.
272      */
273     public CodePointAndVariant composeUnicodeChar(
274             final CodePointAndVariant split, final boolean forbidHighplane) {
275         final MathVariant splitVariant = split.getVariant();
276         final Map<Integer, Integer[]> famList = this.composeAttrs
277                 .get(splitVariant.getFontFamily());
278         if (famList == null) {
279             return split;
280         }
281         final Integer[] aList = famList.get(split.getCodePoint());
282         if (aList == null) {
283             return split;
284         }
285 
286         final int splitStyle = splitVariant.getAwtStyle();
287         Integer to = aList[splitStyle];
288         if (to != null) {
289             if (forbidHighplane && to >= CharacterMapping.HIGHPLANE_START) {
290                 return split;
291             }
292             return new CodePointAndVariant(to, MathVariant.NORMAL);
293         }
294         if (splitStyle != 0) {
295             to = aList[0];
296         }
297         if (to != null) {
298             if (forbidHighplane && to >= CharacterMapping.HIGHPLANE_START) {
299                 return split;
300             }
301             return new CodePointAndVariant(to, new MathVariant(splitStyle,
302                     FontFamily.SERIF));
303         }
304         return split;
305 
306     }
307 
308     /**
309      * Extract the given char into variant and codepoint.
310      * 
311      * @param test
312      *            the Unicode char to split up.
313      * @return A {@link CodePointAndVariant} representing the same character
314      *         with explicit variant.
315      */
316     public CodePointAndVariant extractUnicodeAttr(
317             final CodePointAndVariant test) {
318         final CodePointAndVariant mapsTo = this.extractAttrs.get(test
319                 .getCodePoint());
320         if (mapsTo == null) {
321             return test;
322         }
323         final MathVariant testVariant = test.getVariant();
324         final int testStyle = testVariant.getAwtStyle();
325         final int mapsToCodepoint = mapsTo.getCodePoint();
326         final CodePointAndVariant retVal;
327         if ((testStyle == Font.PLAIN)
328                 || (this.forceSet.contains(mapsToCodepoint))) {
329             retVal = mapsTo;
330         } else {
331             final MathVariant mapsToVariant = mapsTo.getVariant();
332             retVal = new CodePointAndVariant(mapsToCodepoint,
333                     new MathVariant(testStyle | mapsToVariant.getAwtStyle(),
334                             mapsToVariant.getFontFamily()));
335         }
336         return retVal;
337     }
338 
339     /**
340      * Get all alternatives codePoints for this codePoint. They can be used if
341      * the original code point and variant is not available.
342      * 
343      * @param cpav
344      *            original CodePointAndVariant
345      * @return A List of alternative code points to check.
346      */
347     public List<CodePointAndVariant> getAllAlternatives(
348             final CodePointAndVariant cpav) {
349         final Reference<List<CodePointAndVariant>> ref = this.alternatives
350                 .get(cpav);
351         List<CodePointAndVariant> result = null;
352         if (ref != null) {
353             result = ref.get();
354         }
355         if (result == null) {
356             result = this.reallyGetAllAternatives(cpav, true);
357             this.alternatives.put(cpav,
358                     new SoftReference<List<CodePointAndVariant>>(result));
359         }
360         return result;
361     }
362 
363     private List<CodePointAndVariant> reallyGetAllAternatives(
364             final CodePointAndVariant cpav, final boolean useGlyphMapping) {
365         final List<CodePointAndVariant> list = new ArrayList<CodePointAndVariant>(
366                 3);
367 
368         final CodePointAndVariant cpav2 = this.extractUnicodeAttr(cpav);
369         // High Plane is broken on OS X!
370         final CodePointAndVariant cpav3 = this.composeUnicodeChar(cpav2,
371                 StringUtil.OSX);
372 
373         this.addGlyphsAndTheirAlternatives(list, cpav2, useGlyphMapping);
374         this.addGlyphsAndTheirAlternatives(list, cpav3, useGlyphMapping);
375         this.addGlyphsAndTheirAlternatives(list, cpav, useGlyphMapping);
376 
377         return list;
378     }
379 
380     private void addGlyphsAndTheirAlternatives(
381             final List<CodePointAndVariant> list,
382             final CodePointAndVariant cpav, final boolean useGlyphMapping) {
383         if (!list.contains(cpav)) {
384             list.add(cpav);
385             if (useGlyphMapping) {
386                 this.addAlternateGlyph(list, cpav);
387             }
388         }
389     }
390 
391     private void addAlternateGlyph(final List<CodePointAndVariant> list,
392             final CodePointAndVariant cpav) {
393         final int codePoint = cpav.getCodePoint();
394         final String charAsString = new String(new int[] { codePoint }, 0, 1);
395         final String glyphName = Glyphs.stringToGlyph(charAsString);
396         final String[] alternateGlyphNames = Glyphs
397                 .getCharNameAlternativesFor(glyphName);
398         if (alternateGlyphNames != null) {
399             for (final String altGlyph : alternateGlyphNames) {
400                 final int altcp = Glyphs.getUnicodeSequenceForGlyphName(
401                         altGlyph).codePointAt(0);
402                 final List<CodePointAndVariant> alternateList = this
403                         .reallyGetAllAternatives(new CodePointAndVariant(
404                                 altcp, cpav.getVariant()), false);
405                 for (final CodePointAndVariant alternateCpav : alternateList) {
406                     if (!list.contains(alternateCpav)) {
407                         list.add(alternateCpav);
408                     }
409                 }
410             }
411         }
412     }
413 
414     /**
415      * Checks if the given codepoint is a "marking" codepoint. Marking
416      * codepoints do not display by themself, but are usually combined with the
417      * previous character.
418      * 
419      * @param codepoint
420      *            codepoint to check.
421      * @return true if this codepoint reprensents a mark.
422      */
423     public boolean isMark(int codepoint) {
424         return this.markSet.contains(codepoint);
425     }
426     
427 }