View Javadoc

1   /*
2    * Copyright 2002 - 2006 JEuclid, http://jeuclid.sf.net
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  /* $Id: CharConverter.java,v c507eb905870 2008/05/15 16:59:47 maxberger $ */
18  
19  package net.sourceforge.jeuclid.elements.support.text;
20  
21  import java.util.HashMap;
22  import java.util.Map;
23  
24  /**
25   * class for char converting.
26   * 
27   * @version $Revision: c507eb905870 $
28   */
29  public final class CharConverter {
30  
31      /**
32       * Char equivalents to be mapped immediately before display.
33       */
34      private static final Map<Character, String> LATE_MAP_MAP = new HashMap<Character, String>();
35  
36      /**
37       * Char equivalents to be mapped when parsing.
38       */
39      private static final Map<Character, String> EARLY_MAP_MAP = new HashMap<Character, String>(
40              200);
41  
42      private CharConverter() {
43          // Empty on purpose.
44      }
45  
46      /**
47       * @param string
48       *            String for char replacing
49       * @return result string
50       */
51      private static String actualConvert(final String string,
52              final Map<Character, String> map) {
53          final StringBuffer buffer = new StringBuffer();
54          for (int i = 0; i < string.length(); i++) {
55              final char orig = string.charAt(i);
56              final String mapsTo = map.get(orig);
57              if (mapsTo == null) {
58                  buffer.append(orig);
59              } else {
60                  buffer.append(mapsTo);
61              }
62          }
63          return buffer.toString();
64      }
65  
66      /**
67       * @param string
68       *            String for char replacing
69       * @return result string
70       */
71      public static String convertEarly(final String string) {
72          return CharConverter.actualConvert(string,
73                  CharConverter.EARLY_MAP_MAP);
74      }
75  
76      /**
77       * @param string
78       *            String for char replacing
79       * @return result string
80       */
81      public static String convertLate(final String string) {
82          return CharConverter
83                  .actualConvert(string, CharConverter.LATE_MAP_MAP);
84      }
85  
86      // CHECKSTYLE:OFF
87      // Too many statements, but this is initialization!
88      static {
89          CharConverter.LATE_MAP_MAP.put('\u2061', "");
90          CharConverter.LATE_MAP_MAP.put('\u200b', "");
91          CharConverter.LATE_MAP_MAP.put('\u2062', "");
92          CharConverter.LATE_MAP_MAP.put('\u2148', "");
93          /*
94           * This maps UnderBar -> Overbar. The regular mapping of underbars
95           * (0332) is a combining character, which produces incorrect text
96           * metrics.
97           * 
98           * Underscore (_) should be used, but then the information about
99           * strechting is lost.
100          * 
101          * OverBars are higher in the layout. However, UnderBars are usually
102          * only used in underscripts, where this produces no problem.
103          * 
104          * TODO: Check if there are other combining characters among the
105          * default entities and map them accordingly.
106          */
107         CharConverter.LATE_MAP_MAP.put('\u0332', "\u00AF");
108 
109         /*
110          * These are created by OpenOffice formula < 2.2. See
111          * http://www.openoffice.org/servlets/ReadMsg?list=dev&msgNo=543
112          * 
113          * These are mapping from the private area of the "starSymbol" (now
114          * 'openSymbol') font.
115          */
116         CharConverter.EARLY_MAP_MAP.put('\uE080', "\u2031");
117         CharConverter.EARLY_MAP_MAP.put('\uE081', "\uF613");
118         CharConverter.EARLY_MAP_MAP.put('\uE083', "\u002B");
119         CharConverter.EARLY_MAP_MAP.put('\uE084', "\u003C");
120         CharConverter.EARLY_MAP_MAP.put('\uE085', "\u003E");
121         CharConverter.EARLY_MAP_MAP.put('\uE086', "\ue425");
122         CharConverter.EARLY_MAP_MAP.put('\uE087', "\ue421");
123         CharConverter.EARLY_MAP_MAP.put('\uE089', "\u2208");
124         CharConverter.EARLY_MAP_MAP.put('\uE08A', "\u0192");
125         CharConverter.EARLY_MAP_MAP.put('\uE08B', "\u2026");
126         CharConverter.EARLY_MAP_MAP.put('\uE08C', "\u2192");
127         CharConverter.EARLY_MAP_MAP.put('\uE091', "\u0302");
128         CharConverter.EARLY_MAP_MAP.put('\uE092', "\u030C");
129         CharConverter.EARLY_MAP_MAP.put('\uE093', "\u0306");
130         CharConverter.EARLY_MAP_MAP.put('\uE094', "\u0301");
131         CharConverter.EARLY_MAP_MAP.put('\uE095', "\u0300");
132         CharConverter.EARLY_MAP_MAP.put('\uE096', "\u0303");
133         CharConverter.EARLY_MAP_MAP.put('\uE097', "\u0304");
134         // Was: 20D7, but 2192 is more widely supported
135         CharConverter.EARLY_MAP_MAP.put('\uE098', "\u2192");
136         CharConverter.EARLY_MAP_MAP.put('\uE099', "\u02d9");
137         CharConverter.EARLY_MAP_MAP.put('\uE09A', "\u0308");
138         CharConverter.EARLY_MAP_MAP.put('\uE09B', "\u20DB");
139         CharConverter.EARLY_MAP_MAP.put('\uE09C', "\u030A");
140         CharConverter.EARLY_MAP_MAP.put('\uE09E', "\u0028");
141         CharConverter.EARLY_MAP_MAP.put('\uE09F', "\u0029");
142         CharConverter.EARLY_MAP_MAP.put('\uE0A2', "\u301A");
143         CharConverter.EARLY_MAP_MAP.put('\uE0A3', "\u301B");
144         CharConverter.EARLY_MAP_MAP.put('\uE0A4', "\u2373");
145         CharConverter.EARLY_MAP_MAP.put('\uE0A8', "\u002F");
146         CharConverter.EARLY_MAP_MAP.put('\uE0A9', "\\");
147         CharConverter.EARLY_MAP_MAP.put('\uE0AA', "\u274F");
148         CharConverter.EARLY_MAP_MAP.put('\uE0AC', "\u0393");
149         CharConverter.EARLY_MAP_MAP.put('\uE0AD', "\u0394");
150         CharConverter.EARLY_MAP_MAP.put('\uE0AE', "\u0398");
151         CharConverter.EARLY_MAP_MAP.put('\uE0AF', "\u039b");
152         CharConverter.EARLY_MAP_MAP.put('\uE0B0', "\u039e");
153         CharConverter.EARLY_MAP_MAP.put('\uE0B1', "\u03A0");
154         CharConverter.EARLY_MAP_MAP.put('\uE0B2', "\u03a3");
155         CharConverter.EARLY_MAP_MAP.put('\uE0B3', "\u03a5");
156         CharConverter.EARLY_MAP_MAP.put('\uE0B4', "\u03a6");
157         CharConverter.EARLY_MAP_MAP.put('\uE0B5', "\u03a8");
158         CharConverter.EARLY_MAP_MAP.put('\uE0B6', "\u03a9");
159         CharConverter.EARLY_MAP_MAP.put('\uE0B7', "\u03b1");
160         CharConverter.EARLY_MAP_MAP.put('\uE0B8', "\u03b2");
161         CharConverter.EARLY_MAP_MAP.put('\uE0B9', "\u03b3");
162         CharConverter.EARLY_MAP_MAP.put('\uE0BA', "\u03b4");
163         CharConverter.EARLY_MAP_MAP.put('\uE0BB', "\u03b5");
164         CharConverter.EARLY_MAP_MAP.put('\uE0BC', "\u03b6");
165         CharConverter.EARLY_MAP_MAP.put('\uE0BD', "\u03b7");
166         CharConverter.EARLY_MAP_MAP.put('\uE0BE', "\u03b8");
167         CharConverter.EARLY_MAP_MAP.put('\uE0BF', "\u03b9");
168         CharConverter.EARLY_MAP_MAP.put('\uE0C0', "\u03ba");
169         CharConverter.EARLY_MAP_MAP.put('\uE0C1', "\u03bb");
170         CharConverter.EARLY_MAP_MAP.put('\uE0C2', "\u03bc");
171         CharConverter.EARLY_MAP_MAP.put('\uE0C3', "\u03bd");
172         CharConverter.EARLY_MAP_MAP.put('\uE0C4', "\u03be");
173         CharConverter.EARLY_MAP_MAP.put('\uE0C5', "\u03bf");
174         CharConverter.EARLY_MAP_MAP.put('\uE0C6', "\u03c0");
175         CharConverter.EARLY_MAP_MAP.put('\uE0C7', "\u03c1");
176         CharConverter.EARLY_MAP_MAP.put('\uE0C8', "\u03c3");
177         CharConverter.EARLY_MAP_MAP.put('\uE0C9', "\u03c4");
178         CharConverter.EARLY_MAP_MAP.put('\uE0CA', "\u03c5");
179         CharConverter.EARLY_MAP_MAP.put('\uE0CB', "\u03c6");
180         CharConverter.EARLY_MAP_MAP.put('\uE0CC', "\u03c7");
181         CharConverter.EARLY_MAP_MAP.put('\uE0CD', "\u03c8");
182         CharConverter.EARLY_MAP_MAP.put('\uE0CE', "\u03c9");
183         CharConverter.EARLY_MAP_MAP.put('\uE0CF', "\u03b5");
184         CharConverter.EARLY_MAP_MAP.put('\uE0D0', "\u03d1");
185         CharConverter.EARLY_MAP_MAP.put('\uE0D1', "\u03d6");
186         CharConverter.EARLY_MAP_MAP.put('\uE0D2', "\u03f1");
187         CharConverter.EARLY_MAP_MAP.put('\uE0D3', "\u03db");
188         CharConverter.EARLY_MAP_MAP.put('\uE0D4', "\u2118");
189         CharConverter.EARLY_MAP_MAP.put('\uE0D5', "\u2202");
190         CharConverter.EARLY_MAP_MAP.put('\uE0D6', "\u2129");
191         CharConverter.EARLY_MAP_MAP.put('\uE0D7', "\u2107");
192         CharConverter.EARLY_MAP_MAP.put('\uE0D8', "\u2127");
193         CharConverter.EARLY_MAP_MAP.put('\uE0D9', "\u22A4");
194         CharConverter.EARLY_MAP_MAP.put('\uE0DA', "\u019B");
195         CharConverter.EARLY_MAP_MAP.put('\uE0DB', "\u2190");
196         CharConverter.EARLY_MAP_MAP.put('\uE0DC', "\u2191");
197         CharConverter.EARLY_MAP_MAP.put('\uE0DD', "\u2193");
198 
199         // This set is generated by LaTeXMathML, and seems to be the set
200         // supported via Mathematica 4.1 fonts in Firefox 2.0
201 
202         // SCRIPT CAPITALS
203         CharConverter.EARLY_MAP_MAP.put('\uEF35', new String(
204                 new int[] { 0x1D49C }, 0, 1));
205         CharConverter.EARLY_MAP_MAP.put('\uEF36', new String(
206                 new int[] { 0x1D49E }, 0, 1));
207         CharConverter.EARLY_MAP_MAP.put('\uEF37', new String(
208                 new int[] { 0x1D49F }, 0, 1));
209         CharConverter.EARLY_MAP_MAP.put('\uEF38', new String(
210                 new int[] { 0x1D4A2 }, 0, 1));
211         CharConverter.EARLY_MAP_MAP.put('\uEF39', new String(
212                 new int[] { 0x1D4A5 }, 0, 1));
213         CharConverter.EARLY_MAP_MAP.put('\uEF3A', new String(
214                 new int[] { 0x1D4A6 }, 0, 1));
215         CharConverter.EARLY_MAP_MAP.put('\uEF3B', new String(
216                 new int[] { 0x1D4A9 }, 0, 1));
217         CharConverter.EARLY_MAP_MAP.put('\uEF3C', new String(
218                 new int[] { 0x1D4AA }, 0, 1));
219         CharConverter.EARLY_MAP_MAP.put('\uEF3D', new String(
220                 new int[] { 0x1D4AB }, 0, 1));
221         CharConverter.EARLY_MAP_MAP.put('\uEF3E', new String(
222                 new int[] { 0x1D4AC }, 0, 1));
223         CharConverter.EARLY_MAP_MAP.put('\uEF3F', new String(
224                 new int[] { 0x1D4AE }, 0, 1));
225         CharConverter.EARLY_MAP_MAP.put('\uEF40', new String(
226                 new int[] { 0x1D4AF }, 0, 1));
227         CharConverter.EARLY_MAP_MAP.put('\uEF41', new String(
228                 new int[] { 0x1D4B0 }, 0, 1));
229         CharConverter.EARLY_MAP_MAP.put('\uEF42', new String(
230                 new int[] { 0x1D4B1 }, 0, 1));
231         CharConverter.EARLY_MAP_MAP.put('\uEF43', new String(
232                 new int[] { 0x1D4B2 }, 0, 1));
233         CharConverter.EARLY_MAP_MAP.put('\uEF44', new String(
234                 new int[] { 0x1D4B3 }, 0, 1));
235         CharConverter.EARLY_MAP_MAP.put('\uEF45', new String(
236                 new int[] { 0x1D4B4 }, 0, 1));
237         CharConverter.EARLY_MAP_MAP.put('\uEF46', new String(
238                 new int[] { 0x1D4B5 }, 0, 1));
239 
240         // FRAKTUR
241         CharConverter.EARLY_MAP_MAP.put('\uEF5D', new String(
242                 new int[] { 0x1D504 }, 0, 1));
243         CharConverter.EARLY_MAP_MAP.put('\uEF5E', new String(
244                 new int[] { 0x1D505 }, 0, 1));
245         CharConverter.EARLY_MAP_MAP.put('\uEF5F', new String(
246                 new int[] { 0x1D507 }, 0, 1));
247         CharConverter.EARLY_MAP_MAP.put('\uEF60', new String(
248                 new int[] { 0x1D508 }, 0, 1));
249         CharConverter.EARLY_MAP_MAP.put('\uEF61', new String(
250                 new int[] { 0x1D509 }, 0, 1));
251         CharConverter.EARLY_MAP_MAP.put('\uEF62', new String(
252                 new int[] { 0x1D50A }, 0, 1));
253         CharConverter.EARLY_MAP_MAP.put('\uEF63', new String(
254                 new int[] { 0x1D50D }, 0, 1));
255         CharConverter.EARLY_MAP_MAP.put('\uEF64', new String(
256                 new int[] { 0x1D50E }, 0, 1));
257         CharConverter.EARLY_MAP_MAP.put('\uEF65', new String(
258                 new int[] { 0x1D50F }, 0, 1));
259         CharConverter.EARLY_MAP_MAP.put('\uEF66', new String(
260                 new int[] { 0x1D510 }, 0, 1));
261         CharConverter.EARLY_MAP_MAP.put('\uEF67', new String(
262                 new int[] { 0x1D511 }, 0, 1));
263         CharConverter.EARLY_MAP_MAP.put('\uEF68', new String(
264                 new int[] { 0x1D512 }, 0, 1));
265         CharConverter.EARLY_MAP_MAP.put('\uEF69', new String(
266                 new int[] { 0x1D513 }, 0, 1));
267         CharConverter.EARLY_MAP_MAP.put('\uEF6A', new String(
268                 new int[] { 0x1D514 }, 0, 1));
269         CharConverter.EARLY_MAP_MAP.put('\uEF6B', new String(
270                 new int[] { 0x1D516 }, 0, 1));
271         CharConverter.EARLY_MAP_MAP.put('\uEF6C', new String(
272                 new int[] { 0x1D517 }, 0, 1));
273         CharConverter.EARLY_MAP_MAP.put('\uEF6D', new String(
274                 new int[] { 0x1D518 }, 0, 1));
275         CharConverter.EARLY_MAP_MAP.put('\uEF6E', new String(
276                 new int[] { 0x1D519 }, 0, 1));
277         CharConverter.EARLY_MAP_MAP.put('\uEF6F', new String(
278                 new int[] { 0x1D51A }, 0, 1));
279         CharConverter.EARLY_MAP_MAP.put('\uEF70', new String(
280                 new int[] { 0x1D51B }, 0, 1));
281         CharConverter.EARLY_MAP_MAP.put('\uEF71', new String(
282                 new int[] { 0x1D51C }, 0, 1));
283 
284         // DOUBLE_STRUCK CAPITALS
285         CharConverter.EARLY_MAP_MAP.put('\uEF8C', new String(
286                 new int[] { 0x1D538 }, 0, 1));
287         CharConverter.EARLY_MAP_MAP.put('\uEF8D', new String(
288                 new int[] { 0x1D539 }, 0, 1));
289         CharConverter.EARLY_MAP_MAP.put('\uEF8E', new String(
290                 new int[] { 0x1D53B }, 0, 1));
291         CharConverter.EARLY_MAP_MAP.put('\uEF8F', new String(
292                 new int[] { 0x1D53C }, 0, 1));
293         CharConverter.EARLY_MAP_MAP.put('\uEF90', new String(
294                 new int[] { 0x1D53D }, 0, 1));
295         CharConverter.EARLY_MAP_MAP.put('\uEF91', new String(
296                 new int[] { 0x1D53E }, 0, 1));
297         CharConverter.EARLY_MAP_MAP.put('\uEF92', new String(
298                 new int[] { 0x1D540 }, 0, 1));
299         CharConverter.EARLY_MAP_MAP.put('\uEF93', new String(
300                 new int[] { 0x1D541 }, 0, 1));
301         CharConverter.EARLY_MAP_MAP.put('\uEF94', new String(
302                 new int[] { 0x1D542 }, 0, 1));
303         CharConverter.EARLY_MAP_MAP.put('\uEF95', new String(
304                 new int[] { 0x1D543 }, 0, 1));
305         CharConverter.EARLY_MAP_MAP.put('\uEF96', new String(
306                 new int[] { 0x1D544 }, 0, 1));
307         CharConverter.EARLY_MAP_MAP.put('\uEF97', new String(
308                 new int[] { 0x1D546 }, 0, 1));
309         CharConverter.EARLY_MAP_MAP.put('\uEF98', new String(
310                 new int[] { 0x1D54A }, 0, 1));
311         CharConverter.EARLY_MAP_MAP.put('\uEF99', new String(
312                 new int[] { 0x1D54B }, 0, 1));
313         CharConverter.EARLY_MAP_MAP.put('\uEF9A', new String(
314                 new int[] { 0x1D54C }, 0, 1));
315         CharConverter.EARLY_MAP_MAP.put('\uEF9B', new String(
316                 new int[] { 0x1D54D }, 0, 1));
317         CharConverter.EARLY_MAP_MAP.put('\uEF9C', new String(
318                 new int[] { 0x1D54E }, 0, 1));
319         CharConverter.EARLY_MAP_MAP.put('\uEF9D', new String(
320                 new int[] { 0x1D54F }, 0, 1));
321         CharConverter.EARLY_MAP_MAP.put('\uEF9E', new String(
322                 new int[] { 0x1D550 }, 0, 1));
323         // CHECKSTYLE:ON
324     }
325 }