1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package net.sourceforge.jeuclid.elements.support.text;
20
21 import java.awt.Font;
22 import java.io.BufferedReader;
23 import java.io.IOException;
24 import java.io.InputStream;
25 import java.io.InputStreamReader;
26 import java.io.ObjectInput;
27 import java.io.ObjectInputStream;
28 import java.io.Serializable;
29 import java.lang.ref.Reference;
30 import java.lang.ref.SoftReference;
31 import java.util.ArrayList;
32 import java.util.EnumMap;
33 import java.util.HashMap;
34 import java.util.HashSet;
35 import java.util.List;
36 import java.util.Map;
37 import java.util.Set;
38
39 import net.sourceforge.jeuclid.elements.support.attributes.FontFamily;
40 import net.sourceforge.jeuclid.elements.support.attributes.MathVariant;
41
42 import org.apache.commons.logging.Log;
43 import org.apache.commons.logging.LogFactory;
44 import org.apache.xmlgraphics.fonts.Glyphs;
45
46
47
48
49 public final class CharacterMapping implements Serializable {
50
51 private static final String LOAD_ERROR = "Error loading character mappings";
52
53 private static final int POS_CODESTR = 0;
54
55 private static final int POS_DESCRIPTION = 1;
56
57 private static final int POS_CATEGORY = 2;
58
59 private static final int POS_MAPS = 5;
60
61
62
63 private static final int HIGHPLANE_MATH_CHARS_START = 0x1D400;
64
65 private static final int HIGHPLANE_START = 0x10000;
66
67
68
69
70 private static final long serialVersionUID = 1L;
71
72 private static CharacterMapping instance;
73
74
75
76
77 private static final Log LOGGER = LogFactory
78 .getLog(CharacterMapping.class);
79
80 private final Map<Integer, CodePointAndVariant> extractAttrs;
81
82 private final Set<Integer> forceSet;
83
84 private final Set<Integer> markSet;
85
86 private final Map<FontFamily, Map<Integer, Integer[]>> composeAttrs;
87
88 private transient Map<CodePointAndVariant, Reference<List<CodePointAndVariant>>> alternatives;
89
90
91
92
93 private CharacterMapping() {
94 this.extractAttrs = new HashMap<Integer, CodePointAndVariant>();
95 this.forceSet = new HashSet<Integer>();
96 this.markSet = new HashSet<Integer>();
97 this.composeAttrs = new EnumMap<FontFamily, Map<Integer, Integer[]>>(
98 FontFamily.class);
99 this.readResolve();
100 this.loadUnicodeData();
101 }
102
103 private Object readResolve() {
104 this.alternatives = new HashMap<CodePointAndVariant, Reference<List<CodePointAndVariant>>>();
105 return this;
106 }
107
108 private void loadUnicodeData() {
109 final InputStream is = CharacterMapping.class
110 .getResourceAsStream("/net/sourceforge/jeuclid/UnicodeData.txt");
111 try {
112 final BufferedReader r = new BufferedReader(
113 new InputStreamReader(is));
114 try {
115 String s;
116 while ((s = r.readLine()) != null) {
117 final String[] c = s.split(";");
118 if (c.length > CharacterMapping.POS_MAPS) {
119 this.process(c[CharacterMapping.POS_CODESTR],
120 c[CharacterMapping.POS_DESCRIPTION],
121 c[CharacterMapping.POS_CATEGORY],
122 c[CharacterMapping.POS_MAPS]);
123 }
124 }
125 } catch (final IOException e) {
126 CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, e);
127 } finally {
128 try {
129 r.close();
130 } catch (final IOException e) {
131 CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR,
132 e);
133 }
134 }
135 } catch (final NullPointerException e) {
136 CharacterMapping.LOGGER.warn(CharacterMapping.LOAD_ERROR, e);
137 }
138 }
139
140 private void process(final String codestr, final String descr,
141 final String category, final String mapsStr) {
142 try {
143 final int codepoint = Integer.parseInt(codestr, 16);
144
145 if (category.startsWith("M")) {
146 this.markSet.add(codepoint);
147 }
148
149 if (!mapsStr.startsWith("<font> ")) {
150 return;
151 }
152 final int mapsTo = Integer.parseInt(mapsStr.substring(7), 16);
153
154 final int awtStyle = this.parseAwtStyle(descr);
155 final FontFamily fam = this.parseFontFamily(descr);
156 if (fam == null) {
157 return;
158 }
159
160 final boolean force = (codepoint >= CharacterMapping.HIGHPLANE_MATH_CHARS_START)
161 && ((FontFamily.SANSSERIF.equals(fam)) || (FontFamily.SERIF
162 .equals(fam)));
163 if (force) {
164 this.forceSet.add(codepoint);
165 }
166
167
168 final CodePointAndVariant cpav = new CodePointAndVariant(mapsTo,
169 new MathVariant(awtStyle, fam));
170 this.extractAttrs.put(codepoint, cpav);
171 final Map<Integer, Integer[]> ffmap = this.getFFMap(fam);
172 final Integer[] ia = this.getMapsTo(mapsTo, ffmap);
173 ia[awtStyle] = codepoint;
174 } catch (final NumberFormatException nfe) {
175 CharacterMapping.LOGGER.debug("Parse Error", nfe);
176 }
177 }
178
179 private Integer[] getMapsTo(final int mapsTo,
180 final Map<Integer, Integer[]> ffmap) {
181 Integer[] ia = ffmap.get(mapsTo);
182 if (ia == null) {
183 ia = new Integer[Font.BOLD + Font.ITALIC + 1];
184 ffmap.put(mapsTo, ia);
185 }
186 return ia;
187 }
188
189 private Map<Integer, Integer[]> getFFMap(final FontFamily fam) {
190 Map<Integer, Integer[]> ffmap = this.composeAttrs.get(fam);
191 if (ffmap == null) {
192 ffmap = new HashMap<Integer, Integer[]>();
193 this.composeAttrs.put(fam, ffmap);
194 }
195 return ffmap;
196 }
197
198 private int parseAwtStyle(final String descr) {
199 int awtStyle = Font.PLAIN;
200 if (descr.contains("BOLD")) {
201 awtStyle += Font.BOLD;
202 }
203 if (descr.contains("ITALIC")) {
204 awtStyle += Font.ITALIC;
205 }
206 return awtStyle;
207 }
208
209 private FontFamily parseFontFamily(final String descr) {
210 final FontFamily fam;
211 if (descr.contains("DOUBLE-STRUCK")) {
212 fam = FontFamily.DOUBLE_STRUCK;
213 } else if (descr.contains("SCRIPT")) {
214 fam = FontFamily.SCRIPT;
215 } else if (descr.contains("BLACK-LETTER")
216 || descr.contains("FRAKTUR")) {
217 fam = FontFamily.FRAKTUR;
218 } else if (descr.contains("SANS-SERIF")) {
219 fam = FontFamily.SANSSERIF;
220 } else if (descr.contains("MONOSPACE")) {
221 fam = FontFamily.MONOSPACED;
222 } else if (descr.contains("MATHEMATICAL")) {
223 fam = FontFamily.SERIF;
224 } else {
225 fam = null;
226 }
227 return fam;
228 }
229
230
231
232
233
234
235 public static synchronized CharacterMapping getInstance() {
236 if (CharacterMapping.instance == null) {
237 CharacterMapping m;
238 try {
239 final InputStream is = CharacterMapping.class
240 .getResourceAsStream("/net/sourceforge/jeuclid/charmap.ser");
241 final ObjectInput oi = new ObjectInputStream(is);
242 m = (CharacterMapping) oi.readObject();
243 oi.close();
244 } catch (final ClassNotFoundException cnfe) {
245 m = null;
246 } catch (final IllegalArgumentException e) {
247 m = null;
248 } catch (final IOException e) {
249 m = null;
250 } catch (final NullPointerException e) {
251 m = null;
252 }
253 if (m == null) {
254 CharacterMapping.instance = new CharacterMapping();
255 } else {
256 CharacterMapping.instance = m;
257 }
258 }
259 return CharacterMapping.instance;
260 }
261
262
263
264
265
266
267
268
269
270
271
272
273 public CodePointAndVariant composeUnicodeChar(
274 final CodePointAndVariant split, final boolean forbidHighplane) {
275 final MathVariant splitVariant = split.getVariant();
276 final Map<Integer, Integer[]> famList = this.composeAttrs
277 .get(splitVariant.getFontFamily());
278 if (famList == null) {
279 return split;
280 }
281 final Integer[] aList = famList.get(split.getCodePoint());
282 if (aList == null) {
283 return split;
284 }
285
286 final int splitStyle = splitVariant.getAwtStyle();
287 Integer to = aList[splitStyle];
288 if (to != null) {
289 if (forbidHighplane && to >= CharacterMapping.HIGHPLANE_START) {
290 return split;
291 }
292 return new CodePointAndVariant(to, MathVariant.NORMAL);
293 }
294 if (splitStyle != 0) {
295 to = aList[0];
296 }
297 if (to != null) {
298 if (forbidHighplane && to >= CharacterMapping.HIGHPLANE_START) {
299 return split;
300 }
301 return new CodePointAndVariant(to, new MathVariant(splitStyle,
302 FontFamily.SERIF));
303 }
304 return split;
305
306 }
307
308
309
310
311
312
313
314
315
316 public CodePointAndVariant extractUnicodeAttr(
317 final CodePointAndVariant test) {
318 final CodePointAndVariant mapsTo = this.extractAttrs.get(test
319 .getCodePoint());
320 if (mapsTo == null) {
321 return test;
322 }
323 final MathVariant testVariant = test.getVariant();
324 final int testStyle = testVariant.getAwtStyle();
325 final int mapsToCodepoint = mapsTo.getCodePoint();
326 final CodePointAndVariant retVal;
327 if ((testStyle == Font.PLAIN)
328 || (this.forceSet.contains(mapsToCodepoint))) {
329 retVal = mapsTo;
330 } else {
331 final MathVariant mapsToVariant = mapsTo.getVariant();
332 retVal = new CodePointAndVariant(mapsToCodepoint,
333 new MathVariant(testStyle | mapsToVariant.getAwtStyle(),
334 mapsToVariant.getFontFamily()));
335 }
336 return retVal;
337 }
338
339
340
341
342
343
344
345
346
347 public List<CodePointAndVariant> getAllAlternatives(
348 final CodePointAndVariant cpav) {
349 final Reference<List<CodePointAndVariant>> ref = this.alternatives
350 .get(cpav);
351 List<CodePointAndVariant> result = null;
352 if (ref != null) {
353 result = ref.get();
354 }
355 if (result == null) {
356 result = this.reallyGetAllAternatives(cpav, true);
357 this.alternatives.put(cpav,
358 new SoftReference<List<CodePointAndVariant>>(result));
359 }
360 return result;
361 }
362
363 private List<CodePointAndVariant> reallyGetAllAternatives(
364 final CodePointAndVariant cpav, final boolean useGlyphMapping) {
365 final List<CodePointAndVariant> list = new ArrayList<CodePointAndVariant>(
366 3);
367
368 final CodePointAndVariant cpav2 = this.extractUnicodeAttr(cpav);
369
370 final CodePointAndVariant cpav3 = this.composeUnicodeChar(cpav2,
371 StringUtil.OSX);
372
373 this.addGlyphsAndTheirAlternatives(list, cpav2, useGlyphMapping);
374 this.addGlyphsAndTheirAlternatives(list, cpav3, useGlyphMapping);
375 this.addGlyphsAndTheirAlternatives(list, cpav, useGlyphMapping);
376
377 return list;
378 }
379
380 private void addGlyphsAndTheirAlternatives(
381 final List<CodePointAndVariant> list,
382 final CodePointAndVariant cpav, final boolean useGlyphMapping) {
383 if (!list.contains(cpav)) {
384 list.add(cpav);
385 if (useGlyphMapping) {
386 this.addAlternateGlyph(list, cpav);
387 }
388 }
389 }
390
391 private void addAlternateGlyph(final List<CodePointAndVariant> list,
392 final CodePointAndVariant cpav) {
393 final int codePoint = cpav.getCodePoint();
394 final String charAsString = new String(new int[] { codePoint }, 0, 1);
395 final String glyphName = Glyphs.stringToGlyph(charAsString);
396 final String[] alternateGlyphNames = Glyphs
397 .getCharNameAlternativesFor(glyphName);
398 if (alternateGlyphNames != null) {
399 for (final String altGlyph : alternateGlyphNames) {
400 final int altcp = Glyphs.getUnicodeSequenceForGlyphName(
401 altGlyph).codePointAt(0);
402 final List<CodePointAndVariant> alternateList = this
403 .reallyGetAllAternatives(new CodePointAndVariant(
404 altcp, cpav.getVariant()), false);
405 for (final CodePointAndVariant alternateCpav : alternateList) {
406 if (!list.contains(alternateCpav)) {
407 list.add(alternateCpav);
408 }
409 }
410 }
411 }
412 }
413
414
415
416
417
418
419
420
421
422
423 public boolean isMark(int codepoint) {
424 return this.markSet.contains(codepoint);
425 }
426
427 }