001 /* 002 * Copyright 2002 - 2007 JEuclid, http://jeuclid.sf.net 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 /* $Id: CharacterMapping.java,v 74b8e95997bf 2010/08/11 17:45:46 max $ */ 018 019 package net.sourceforge.jeuclid.elements.support.text; 020 021 import java.awt.Font; 022 import java.io.BufferedReader; 023 import java.io.IOException; 024 import java.io.InputStream; 025 import java.io.InputStreamReader; 026 import java.io.ObjectInput; 027 import java.io.ObjectInputStream; 028 import java.io.Serializable; 029 import java.lang.ref.Reference; 030 import java.lang.ref.SoftReference; 031 import java.util.ArrayList; 032 import java.util.EnumMap; 033 import java.util.HashMap; 034 import java.util.HashSet; 035 import java.util.List; 036 import java.util.Map; 037 import java.util.Set; 038 039 import net.sourceforge.jeuclid.elements.support.attributes.FontFamily; 040 import net.sourceforge.jeuclid.elements.support.attributes.MathVariant; 041 042 import org.apache.commons.logging.Log; 043 import org.apache.commons.logging.LogFactory; 044 import org.apache.xmlgraphics.fonts.Glyphs; 045 046 /** 047 * @version $Revision: 74b8e95997bf $ 048 */ 049 public final class CharacterMapping implements Serializable { 050 051 private static final String LOAD_ERROR = "Error loading character mappings"; 052 053 private static final int POS_CODESTR = 0; 054 055 private static final int POS_DESCRIPTION = 1; 056 057 private static final int POS_CATEGORY = 2; 058 059 private static final int POS_MAPS = 5; 060 061 062 063 private static final int HIGHPLANE_MATH_CHARS_START = 0x1D400; 064 065 private static final int HIGHPLANE_START = 0x10000; 066 067 /** 068 * 069 */ 070 private static final long serialVersionUID = 1L; 071 072 private static CharacterMapping instance; 073 074 /** 075 * Logger for this class. 076 */ 077 private static final Log LOGGER = LogFactory 078 .getLog(CharacterMapping.class); 079 080 private final Map<Integer, CodePointAndVariant> extractAttrs; 081 082 private final Set<Integer> forceSet; 083 084 private final Set<Integer> markSet; 085 086 private final Map<FontFamily, Map<Integer, Integer[]>> composeAttrs; 087 088 private transient Map<CodePointAndVariant, Reference<List<CodePointAndVariant>>> alternatives; 089 090 /** 091 * Default Constructor. 092 */ 093 private CharacterMapping() { 094 this.extractAttrs = new HashMap<Integer, CodePointAndVariant>(); 095 this.forceSet = new HashSet<Integer>(); 096 this.markSet = new HashSet<Integer>(); 097 this.composeAttrs = new EnumMap<FontFamily, Map<Integer, Integer[]>>( 098 FontFamily.class); 099 this.readResolve(); 100 this.loadUnicodeData(); 101 } 102 103 private Object readResolve() { 104 this.alternatives = new HashMap<CodePointAndVariant, Reference<List<CodePointAndVariant>>>(); 105 return this; 106 } 107 108 private void loadUnicodeData() { 109 final InputStream is = CharacterMapping.class 110 .getResourceAsStream("/net/sourceforge/jeuclid/UnicodeData.txt"); 111 try { 112 final BufferedReader r = new BufferedReader( 113 new InputStreamReader(is)); 114 try { 115 String s; 116 while ((s = r.readLine()) != null) { 117 final String[] c = s.split(";"); 118 if (c.length > CharacterMapping.POS_MAPS) { 119 this.process(c[CharacterMapping.POS_CODESTR], 120 c[CharacterMapping.POS_DESCRIPTION], 121 c[CharacterMapping.POS_CATEGORY], 122 c[CharacterMapping.POS_MAPS]); 123 } 124 } 125 } catch (final IOException e) { 126 CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, e); 127 } finally { 128 try { 129 r.close(); 130 } catch (final IOException e) { 131 CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, 132 e); 133 } 134 } 135 } catch (final NullPointerException e) { 136 CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, e); 137 } 138 } 139 140 private void process(final String codestr, final String descr, 141 final String category, final String mapsStr) { 142 try { 143 final int codepoint = Integer.parseInt(codestr, 16); 144 145 if (category.startsWith("M")) { 146 this.markSet.add(codepoint); 147 } 148 149 if (!mapsStr.startsWith("<font> ")) { 150 return; 151 } 152 final int mapsTo = Integer.parseInt(mapsStr.substring(7), 16); 153 154 final int awtStyle = this.parseAwtStyle(descr); 155 final FontFamily fam = this.parseFontFamily(descr); 156 if (fam == null) { 157 return; 158 } 159 160 final boolean force = (codepoint >= CharacterMapping.HIGHPLANE_MATH_CHARS_START) 161 && ((FontFamily.SANSSERIF.equals(fam)) || (FontFamily.SERIF 162 .equals(fam))); 163 if (force) { 164 this.forceSet.add(codepoint); 165 } 166 167 168 final CodePointAndVariant cpav = new CodePointAndVariant(mapsTo, 169 new MathVariant(awtStyle, fam)); 170 this.extractAttrs.put(codepoint, cpav); 171 final Map<Integer, Integer[]> ffmap = this.getFFMap(fam); 172 final Integer[] ia = this.getMapsTo(mapsTo, ffmap); 173 ia[awtStyle] = codepoint; 174 } catch (final NumberFormatException nfe) { 175 CharacterMapping.LOGGER.debug("Parse Error", nfe); 176 } 177 } 178 179 private Integer[] getMapsTo(final int mapsTo, 180 final Map<Integer, Integer[]> ffmap) { 181 Integer[] ia = ffmap.get(mapsTo); 182 if (ia == null) { 183 ia = new Integer[Font.BOLD + Font.ITALIC + 1]; 184 ffmap.put(mapsTo, ia); 185 } 186 return ia; 187 } 188 189 private Map<Integer, Integer[]> getFFMap(final FontFamily fam) { 190 Map<Integer, Integer[]> ffmap = this.composeAttrs.get(fam); 191 if (ffmap == null) { 192 ffmap = new HashMap<Integer, Integer[]>(); 193 this.composeAttrs.put(fam, ffmap); 194 } 195 return ffmap; 196 } 197 198 private int parseAwtStyle(final String descr) { 199 int awtStyle = Font.PLAIN; 200 if (descr.contains("BOLD")) { 201 awtStyle += Font.BOLD; 202 } 203 if (descr.contains("ITALIC")) { 204 awtStyle += Font.ITALIC; 205 } 206 return awtStyle; 207 } 208 209 private FontFamily parseFontFamily(final String descr) { 210 final FontFamily fam; 211 if (descr.contains("DOUBLE-STRUCK")) { 212 fam = FontFamily.DOUBLE_STRUCK; 213 } else if (descr.contains("SCRIPT")) { 214 fam = FontFamily.SCRIPT; 215 } else if (descr.contains("BLACK-LETTER") 216 || descr.contains("FRAKTUR")) { 217 fam = FontFamily.FRAKTUR; 218 } else if (descr.contains("SANS-SERIF")) { 219 fam = FontFamily.SANSSERIF; 220 } else if (descr.contains("MONOSPACE")) { 221 fam = FontFamily.MONOSPACED; 222 } else if (descr.contains("MATHEMATICAL")) { 223 fam = FontFamily.SERIF; 224 } else { 225 fam = null; 226 } 227 return fam; 228 } 229 230 /** 231 * Get the singleton instance of this class. 232 * 233 * @return an instance of CharacterMapping. 234 */ 235 public static synchronized CharacterMapping getInstance() { 236 if (CharacterMapping.instance == null) { 237 CharacterMapping m; 238 try { 239 final InputStream is = CharacterMapping.class 240 .getResourceAsStream("/net/sourceforge/jeuclid/charmap.ser"); 241 final ObjectInput oi = new ObjectInputStream(is); 242 m = (CharacterMapping) oi.readObject(); 243 oi.close(); 244 } catch (final ClassNotFoundException cnfe) { 245 m = null; 246 } catch (final IllegalArgumentException e) { 247 m = null; 248 } catch (final IOException e) { 249 m = null; 250 } catch (final NullPointerException e) { 251 m = null; 252 } 253 if (m == null) { 254 CharacterMapping.instance = new CharacterMapping(); 255 } else { 256 CharacterMapping.instance = m; 257 } 258 } 259 return CharacterMapping.instance; 260 } 261 262 /** 263 * Compose a new SERIF Unicode char. This function tries to compose the 264 * given char into a SERIF char which shows the same characteristics at a 265 * particular Unicode codepoint. 266 * 267 * @param split 268 * the char which contains a coidepoint and variant. 269 * @param forbidHighplane 270 * if the high plane is broken (e.g. on OS X). 271 * @return a CodePointAndVariant representing the same char. 272 */ 273 public CodePointAndVariant composeUnicodeChar( 274 final CodePointAndVariant split, final boolean forbidHighplane) { 275 final MathVariant splitVariant = split.getVariant(); 276 final Map<Integer, Integer[]> famList = this.composeAttrs 277 .get(splitVariant.getFontFamily()); 278 if (famList == null) { 279 return split; 280 } 281 final Integer[] aList = famList.get(split.getCodePoint()); 282 if (aList == null) { 283 return split; 284 } 285 286 final int splitStyle = splitVariant.getAwtStyle(); 287 Integer to = aList[splitStyle]; 288 if (to != null) { 289 if (forbidHighplane && to >= CharacterMapping.HIGHPLANE_START) { 290 return split; 291 } 292 return new CodePointAndVariant(to, MathVariant.NORMAL); 293 } 294 if (splitStyle != 0) { 295 to = aList[0]; 296 } 297 if (to != null) { 298 if (forbidHighplane && to >= CharacterMapping.HIGHPLANE_START) { 299 return split; 300 } 301 return new CodePointAndVariant(to, new MathVariant(splitStyle, 302 FontFamily.SERIF)); 303 } 304 return split; 305 306 } 307 308 /** 309 * Extract the given char into variant and codepoint. 310 * 311 * @param test 312 * the Unicode char to split up. 313 * @return A {@link CodePointAndVariant} representing the same character 314 * with explicit variant. 315 */ 316 public CodePointAndVariant extractUnicodeAttr( 317 final CodePointAndVariant test) { 318 final CodePointAndVariant mapsTo = this.extractAttrs.get(test 319 .getCodePoint()); 320 if (mapsTo == null) { 321 return test; 322 } 323 final MathVariant testVariant = test.getVariant(); 324 final int testStyle = testVariant.getAwtStyle(); 325 final int mapsToCodepoint = mapsTo.getCodePoint(); 326 final CodePointAndVariant retVal; 327 if ((testStyle == Font.PLAIN) 328 || (this.forceSet.contains(mapsToCodepoint))) { 329 retVal = mapsTo; 330 } else { 331 final MathVariant mapsToVariant = mapsTo.getVariant(); 332 retVal = new CodePointAndVariant(mapsToCodepoint, 333 new MathVariant(testStyle | mapsToVariant.getAwtStyle(), 334 mapsToVariant.getFontFamily())); 335 } 336 return retVal; 337 } 338 339 /** 340 * Get all alternatives codePoints for this codePoint. They can be used if 341 * the original code point and variant is not available. 342 * 343 * @param cpav 344 * original CodePointAndVariant 345 * @return A List of alternative code points to check. 346 */ 347 public List<CodePointAndVariant> getAllAlternatives( 348 final CodePointAndVariant cpav) { 349 final Reference<List<CodePointAndVariant>> ref = this.alternatives 350 .get(cpav); 351 List<CodePointAndVariant> result = null; 352 if (ref != null) { 353 result = ref.get(); 354 } 355 if (result == null) { 356 result = this.reallyGetAllAternatives(cpav, true); 357 this.alternatives.put(cpav, 358 new SoftReference<List<CodePointAndVariant>>(result)); 359 } 360 return result; 361 } 362 363 private List<CodePointAndVariant> reallyGetAllAternatives( 364 final CodePointAndVariant cpav, final boolean useGlyphMapping) { 365 final List<CodePointAndVariant> list = new ArrayList<CodePointAndVariant>( 366 3); 367 368 final CodePointAndVariant cpav2 = this.extractUnicodeAttr(cpav); 369 // High Plane is broken on OS X! 370 final CodePointAndVariant cpav3 = this.composeUnicodeChar(cpav2, 371 StringUtil.OSX); 372 373 this.addGlyphsAndTheirAlternatives(list, cpav2, useGlyphMapping); 374 this.addGlyphsAndTheirAlternatives(list, cpav3, useGlyphMapping); 375 this.addGlyphsAndTheirAlternatives(list, cpav, useGlyphMapping); 376 377 return list; 378 } 379 380 private void addGlyphsAndTheirAlternatives( 381 final List<CodePointAndVariant> list, 382 final CodePointAndVariant cpav, final boolean useGlyphMapping) { 383 if (!list.contains(cpav)) { 384 list.add(cpav); 385 if (useGlyphMapping) { 386 this.addAlternateGlyph(list, cpav); 387 } 388 } 389 } 390 391 private void addAlternateGlyph(final List<CodePointAndVariant> list, 392 final CodePointAndVariant cpav) { 393 final int codePoint = cpav.getCodePoint(); 394 final String charAsString = new String(new int[] { codePoint }, 0, 1); 395 final String glyphName = Glyphs.stringToGlyph(charAsString); 396 final String[] alternateGlyphNames = Glyphs 397 .getCharNameAlternativesFor(glyphName); 398 if (alternateGlyphNames != null) { 399 for (final String altGlyph : alternateGlyphNames) { 400 final int altcp = Glyphs.getUnicodeSequenceForGlyphName( 401 altGlyph).codePointAt(0); 402 final List<CodePointAndVariant> alternateList = this 403 .reallyGetAllAternatives(new CodePointAndVariant( 404 altcp, cpav.getVariant()), false); 405 for (final CodePointAndVariant alternateCpav : alternateList) { 406 if (!list.contains(alternateCpav)) { 407 list.add(alternateCpav); 408 } 409 } 410 } 411 } 412 } 413 414 /** 415 * Checks if the given codepoint is a "marking" codepoint. Marking 416 * codepoints do not display by themself, but are usually combined with the 417 * previous character. 418 * 419 * @param codepoint 420 * codepoint to check. 421 * @return true if this codepoint reprensents a mark. 422 */ 423 public boolean isMark(int codepoint) { 424 return this.markSet.contains(codepoint); 425 } 426 427 }