001    /*
002     * Copyright 2002 - 2007 JEuclid, http://jeuclid.sf.net
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     *      http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    /* $Id: CharacterMapping.java,v 74b8e95997bf 2010/08/11 17:45:46 max $ */
018    
019    package net.sourceforge.jeuclid.elements.support.text;
020    
021    import java.awt.Font;
022    import java.io.BufferedReader;
023    import java.io.IOException;
024    import java.io.InputStream;
025    import java.io.InputStreamReader;
026    import java.io.ObjectInput;
027    import java.io.ObjectInputStream;
028    import java.io.Serializable;
029    import java.lang.ref.Reference;
030    import java.lang.ref.SoftReference;
031    import java.util.ArrayList;
032    import java.util.EnumMap;
033    import java.util.HashMap;
034    import java.util.HashSet;
035    import java.util.List;
036    import java.util.Map;
037    import java.util.Set;
038    
039    import net.sourceforge.jeuclid.elements.support.attributes.FontFamily;
040    import net.sourceforge.jeuclid.elements.support.attributes.MathVariant;
041    
042    import org.apache.commons.logging.Log;
043    import org.apache.commons.logging.LogFactory;
044    import org.apache.xmlgraphics.fonts.Glyphs;
045    
046    /**
047     * @version $Revision: 74b8e95997bf $
048     */
049    public final class CharacterMapping implements Serializable {
050    
051        private static final String LOAD_ERROR = "Error loading character mappings";
052    
053        private static final int POS_CODESTR = 0;
054    
055        private static final int POS_DESCRIPTION = 1;
056        
057        private static final int POS_CATEGORY = 2;
058        
059        private static final int POS_MAPS = 5;
060    
061    
062    
063        private static final int HIGHPLANE_MATH_CHARS_START = 0x1D400;
064    
065        private static final int HIGHPLANE_START = 0x10000;
066    
067        /**
068         * 
069         */
070        private static final long serialVersionUID = 1L;
071    
072        private static CharacterMapping instance;
073    
074        /**
075         * Logger for this class.
076         */
077        private static final Log LOGGER = LogFactory
078                .getLog(CharacterMapping.class);
079    
080        private final Map<Integer, CodePointAndVariant> extractAttrs;
081    
082        private final Set<Integer> forceSet;
083    
084        private final Set<Integer> markSet;
085        
086        private final Map<FontFamily, Map<Integer, Integer[]>> composeAttrs;
087    
088        private transient Map<CodePointAndVariant, Reference<List<CodePointAndVariant>>> alternatives;
089    
090        /**
091         * Default Constructor.
092         */
093        private CharacterMapping() {
094            this.extractAttrs = new HashMap<Integer, CodePointAndVariant>();
095            this.forceSet = new HashSet<Integer>();
096            this.markSet = new HashSet<Integer>();
097            this.composeAttrs = new EnumMap<FontFamily, Map<Integer, Integer[]>>(
098                    FontFamily.class);
099            this.readResolve();
100            this.loadUnicodeData();
101        }
102    
103        private Object readResolve() {
104            this.alternatives = new HashMap<CodePointAndVariant, Reference<List<CodePointAndVariant>>>();
105            return this;
106        }
107    
108        private void loadUnicodeData() {
109            final InputStream is = CharacterMapping.class
110                    .getResourceAsStream("/net/sourceforge/jeuclid/UnicodeData.txt");
111            try {
112                final BufferedReader r = new BufferedReader(
113                        new InputStreamReader(is));
114                try {
115                    String s;
116                    while ((s = r.readLine()) != null) {
117                        final String[] c = s.split(";");
118                        if (c.length > CharacterMapping.POS_MAPS) {
119                            this.process(c[CharacterMapping.POS_CODESTR],
120                                    c[CharacterMapping.POS_DESCRIPTION],
121                                    c[CharacterMapping.POS_CATEGORY],
122                                    c[CharacterMapping.POS_MAPS]);
123                        }
124                    }
125                } catch (final IOException e) {
126                    CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, e);
127                } finally {
128                    try {
129                        r.close();
130                    } catch (final IOException e) {
131                        CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR,
132                                e);
133                    }
134                }
135            } catch (final NullPointerException e) {
136                CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, e);
137            }
138        }
139    
140        private void process(final String codestr, final String descr,
141                final String category, final String mapsStr) {
142            try {
143                final int codepoint = Integer.parseInt(codestr, 16);
144                
145                if (category.startsWith("M")) {
146                    this.markSet.add(codepoint);
147                }
148                
149                if (!mapsStr.startsWith("<font> ")) {
150                    return;
151                }
152                final int mapsTo = Integer.parseInt(mapsStr.substring(7), 16);
153    
154                final int awtStyle = this.parseAwtStyle(descr);
155                final FontFamily fam = this.parseFontFamily(descr);
156                if (fam == null) {
157                    return;
158                }
159    
160                final boolean force = (codepoint >= CharacterMapping.HIGHPLANE_MATH_CHARS_START)
161                        && ((FontFamily.SANSSERIF.equals(fam)) || (FontFamily.SERIF
162                                .equals(fam)));
163                if (force) {
164                    this.forceSet.add(codepoint);
165                }
166                
167                
168                final CodePointAndVariant cpav = new CodePointAndVariant(mapsTo,
169                        new MathVariant(awtStyle, fam));
170                this.extractAttrs.put(codepoint, cpav);
171                final Map<Integer, Integer[]> ffmap = this.getFFMap(fam);
172                final Integer[] ia = this.getMapsTo(mapsTo, ffmap);
173                ia[awtStyle] = codepoint;
174            } catch (final NumberFormatException nfe) {
175                CharacterMapping.LOGGER.debug("Parse Error", nfe);
176            }
177        }
178    
179        private Integer[] getMapsTo(final int mapsTo,
180                final Map<Integer, Integer[]> ffmap) {
181            Integer[] ia = ffmap.get(mapsTo);
182            if (ia == null) {
183                ia = new Integer[Font.BOLD + Font.ITALIC + 1];
184                ffmap.put(mapsTo, ia);
185            }
186            return ia;
187        }
188    
189        private Map<Integer, Integer[]> getFFMap(final FontFamily fam) {
190            Map<Integer, Integer[]> ffmap = this.composeAttrs.get(fam);
191            if (ffmap == null) {
192                ffmap = new HashMap<Integer, Integer[]>();
193                this.composeAttrs.put(fam, ffmap);
194            }
195            return ffmap;
196        }
197    
198        private int parseAwtStyle(final String descr) {
199            int awtStyle = Font.PLAIN;
200            if (descr.contains("BOLD")) {
201                awtStyle += Font.BOLD;
202            }
203            if (descr.contains("ITALIC")) {
204                awtStyle += Font.ITALIC;
205            }
206            return awtStyle;
207        }
208    
209        private FontFamily parseFontFamily(final String descr) {
210            final FontFamily fam;
211            if (descr.contains("DOUBLE-STRUCK")) {
212                fam = FontFamily.DOUBLE_STRUCK;
213            } else if (descr.contains("SCRIPT")) {
214                fam = FontFamily.SCRIPT;
215            } else if (descr.contains("BLACK-LETTER")
216                    || descr.contains("FRAKTUR")) {
217                fam = FontFamily.FRAKTUR;
218            } else if (descr.contains("SANS-SERIF")) {
219                fam = FontFamily.SANSSERIF;
220            } else if (descr.contains("MONOSPACE")) {
221                fam = FontFamily.MONOSPACED;
222            } else if (descr.contains("MATHEMATICAL")) {
223                fam = FontFamily.SERIF;
224            } else {
225                fam = null;
226            }
227            return fam;
228        }
229    
230        /**
231         * Get the singleton instance of this class.
232         * 
233         * @return an instance of CharacterMapping.
234         */
235        public static synchronized CharacterMapping getInstance() {
236            if (CharacterMapping.instance == null) {
237                CharacterMapping m;
238                try {
239                    final InputStream is = CharacterMapping.class
240                            .getResourceAsStream("/net/sourceforge/jeuclid/charmap.ser");
241                    final ObjectInput oi = new ObjectInputStream(is);
242                    m = (CharacterMapping) oi.readObject();
243                    oi.close();
244                } catch (final ClassNotFoundException cnfe) {
245                    m = null;
246                } catch (final IllegalArgumentException e) {
247                    m = null;
248                } catch (final IOException e) {
249                    m = null;
250                } catch (final NullPointerException e) {
251                    m = null;
252                }
253                if (m == null) {
254                    CharacterMapping.instance = new CharacterMapping();
255                } else {
256                    CharacterMapping.instance = m;
257                }
258            }
259            return CharacterMapping.instance;
260        }
261    
262        /**
263         * Compose a new SERIF Unicode char. This function tries to compose the
264         * given char into a SERIF char which shows the same characteristics at a
265         * particular Unicode codepoint.
266         * 
267         * @param split
268         *            the char which contains a coidepoint and variant.
269         * @param forbidHighplane
270         *            if the high plane is broken (e.g. on OS X).
271         * @return a CodePointAndVariant representing the same char.
272         */
273        public CodePointAndVariant composeUnicodeChar(
274                final CodePointAndVariant split, final boolean forbidHighplane) {
275            final MathVariant splitVariant = split.getVariant();
276            final Map<Integer, Integer[]> famList = this.composeAttrs
277                    .get(splitVariant.getFontFamily());
278            if (famList == null) {
279                return split;
280            }
281            final Integer[] aList = famList.get(split.getCodePoint());
282            if (aList == null) {
283                return split;
284            }
285    
286            final int splitStyle = splitVariant.getAwtStyle();
287            Integer to = aList[splitStyle];
288            if (to != null) {
289                if (forbidHighplane && to >= CharacterMapping.HIGHPLANE_START) {
290                    return split;
291                }
292                return new CodePointAndVariant(to, MathVariant.NORMAL);
293            }
294            if (splitStyle != 0) {
295                to = aList[0];
296            }
297            if (to != null) {
298                if (forbidHighplane && to >= CharacterMapping.HIGHPLANE_START) {
299                    return split;
300                }
301                return new CodePointAndVariant(to, new MathVariant(splitStyle,
302                        FontFamily.SERIF));
303            }
304            return split;
305    
306        }
307    
308        /**
309         * Extract the given char into variant and codepoint.
310         * 
311         * @param test
312         *            the Unicode char to split up.
313         * @return A {@link CodePointAndVariant} representing the same character
314         *         with explicit variant.
315         */
316        public CodePointAndVariant extractUnicodeAttr(
317                final CodePointAndVariant test) {
318            final CodePointAndVariant mapsTo = this.extractAttrs.get(test
319                    .getCodePoint());
320            if (mapsTo == null) {
321                return test;
322            }
323            final MathVariant testVariant = test.getVariant();
324            final int testStyle = testVariant.getAwtStyle();
325            final int mapsToCodepoint = mapsTo.getCodePoint();
326            final CodePointAndVariant retVal;
327            if ((testStyle == Font.PLAIN)
328                    || (this.forceSet.contains(mapsToCodepoint))) {
329                retVal = mapsTo;
330            } else {
331                final MathVariant mapsToVariant = mapsTo.getVariant();
332                retVal = new CodePointAndVariant(mapsToCodepoint,
333                        new MathVariant(testStyle | mapsToVariant.getAwtStyle(),
334                                mapsToVariant.getFontFamily()));
335            }
336            return retVal;
337        }
338    
339        /**
340         * Get all alternatives codePoints for this codePoint. They can be used if
341         * the original code point and variant is not available.
342         * 
343         * @param cpav
344         *            original CodePointAndVariant
345         * @return A List of alternative code points to check.
346         */
347        public List<CodePointAndVariant> getAllAlternatives(
348                final CodePointAndVariant cpav) {
349            final Reference<List<CodePointAndVariant>> ref = this.alternatives
350                    .get(cpav);
351            List<CodePointAndVariant> result = null;
352            if (ref != null) {
353                result = ref.get();
354            }
355            if (result == null) {
356                result = this.reallyGetAllAternatives(cpav, true);
357                this.alternatives.put(cpav,
358                        new SoftReference<List<CodePointAndVariant>>(result));
359            }
360            return result;
361        }
362    
363        private List<CodePointAndVariant> reallyGetAllAternatives(
364                final CodePointAndVariant cpav, final boolean useGlyphMapping) {
365            final List<CodePointAndVariant> list = new ArrayList<CodePointAndVariant>(
366                    3);
367    
368            final CodePointAndVariant cpav2 = this.extractUnicodeAttr(cpav);
369            // High Plane is broken on OS X!
370            final CodePointAndVariant cpav3 = this.composeUnicodeChar(cpav2,
371                    StringUtil.OSX);
372    
373            this.addGlyphsAndTheirAlternatives(list, cpav2, useGlyphMapping);
374            this.addGlyphsAndTheirAlternatives(list, cpav3, useGlyphMapping);
375            this.addGlyphsAndTheirAlternatives(list, cpav, useGlyphMapping);
376    
377            return list;
378        }
379    
380        private void addGlyphsAndTheirAlternatives(
381                final List<CodePointAndVariant> list,
382                final CodePointAndVariant cpav, final boolean useGlyphMapping) {
383            if (!list.contains(cpav)) {
384                list.add(cpav);
385                if (useGlyphMapping) {
386                    this.addAlternateGlyph(list, cpav);
387                }
388            }
389        }
390    
391        private void addAlternateGlyph(final List<CodePointAndVariant> list,
392                final CodePointAndVariant cpav) {
393            final int codePoint = cpav.getCodePoint();
394            final String charAsString = new String(new int[] { codePoint }, 0, 1);
395            final String glyphName = Glyphs.stringToGlyph(charAsString);
396            final String[] alternateGlyphNames = Glyphs
397                    .getCharNameAlternativesFor(glyphName);
398            if (alternateGlyphNames != null) {
399                for (final String altGlyph : alternateGlyphNames) {
400                    final int altcp = Glyphs.getUnicodeSequenceForGlyphName(
401                            altGlyph).codePointAt(0);
402                    final List<CodePointAndVariant> alternateList = this
403                            .reallyGetAllAternatives(new CodePointAndVariant(
404                                    altcp, cpav.getVariant()), false);
405                    for (final CodePointAndVariant alternateCpav : alternateList) {
406                        if (!list.contains(alternateCpav)) {
407                            list.add(alternateCpav);
408                        }
409                    }
410                }
411            }
412        }
413    
414        /**
415         * Checks if the given codepoint is a "marking" codepoint. Marking
416         * codepoints do not display by themself, but are usually combined with the
417         * previous character.
418         * 
419         * @param codepoint
420         *            codepoint to check.
421         * @return true if this codepoint reprensents a mark.
422         */
423        public boolean isMark(int codepoint) {
424            return this.markSet.contains(codepoint);
425        }
426        
427    }