001 /*
002 * Copyright 2002 - 2006 JEuclid, http://jeuclid.sf.net
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017 /* $Id: CharConverter.java,v c507eb905870 2008/05/15 16:59:47 maxberger $ */
018
019 package net.sourceforge.jeuclid.elements.support.text;
020
021 import java.util.HashMap;
022 import java.util.Map;
023
024 /**
025 * class for char converting.
026 *
027 * @version $Revision: c507eb905870 $
028 */
029 public final class CharConverter {
030
031 /**
032 * Char equivalents to be mapped immediately before display.
033 */
034 private static final Map<Character, String> LATE_MAP_MAP = new HashMap<Character, String>();
035
036 /**
037 * Char equivalents to be mapped when parsing.
038 */
039 private static final Map<Character, String> EARLY_MAP_MAP = new HashMap<Character, String>(
040 200);
041
042 private CharConverter() {
043 // Empty on purpose.
044 }
045
046 /**
047 * @param string
048 * String for char replacing
049 * @return result string
050 */
051 private static String actualConvert(final String string,
052 final Map<Character, String> map) {
053 final StringBuffer buffer = new StringBuffer();
054 for (int i = 0; i < string.length(); i++) {
055 final char orig = string.charAt(i);
056 final String mapsTo = map.get(orig);
057 if (mapsTo == null) {
058 buffer.append(orig);
059 } else {
060 buffer.append(mapsTo);
061 }
062 }
063 return buffer.toString();
064 }
065
066 /**
067 * @param string
068 * String for char replacing
069 * @return result string
070 */
071 public static String convertEarly(final String string) {
072 return CharConverter.actualConvert(string,
073 CharConverter.EARLY_MAP_MAP);
074 }
075
076 /**
077 * @param string
078 * String for char replacing
079 * @return result string
080 */
081 public static String convertLate(final String string) {
082 return CharConverter
083 .actualConvert(string, CharConverter.LATE_MAP_MAP);
084 }
085
086 // CHECKSTYLE:OFF
087 // Too many statements, but this is initialization!
088 static {
089 CharConverter.LATE_MAP_MAP.put('\u2061', "");
090 CharConverter.LATE_MAP_MAP.put('\u200b', "");
091 CharConverter.LATE_MAP_MAP.put('\u2062', "");
092 CharConverter.LATE_MAP_MAP.put('\u2148', "");
093 /*
094 * This maps UnderBar -> Overbar. The regular mapping of underbars
095 * (0332) is a combining character, which produces incorrect text
096 * metrics.
097 *
098 * Underscore (_) should be used, but then the information about
099 * strechting is lost.
100 *
101 * OverBars are higher in the layout. However, UnderBars are usually
102 * only used in underscripts, where this produces no problem.
103 *
104 * TODO: Check if there are other combining characters among the
105 * default entities and map them accordingly.
106 */
107 CharConverter.LATE_MAP_MAP.put('\u0332', "\u00AF");
108
109 /*
110 * These are created by OpenOffice formula < 2.2. See
111 * http://www.openoffice.org/servlets/ReadMsg?list=dev&msgNo=543
112 *
113 * These are mapping from the private area of the "starSymbol" (now
114 * 'openSymbol') font.
115 */
116 CharConverter.EARLY_MAP_MAP.put('\uE080', "\u2031");
117 CharConverter.EARLY_MAP_MAP.put('\uE081', "\uF613");
118 CharConverter.EARLY_MAP_MAP.put('\uE083', "\u002B");
119 CharConverter.EARLY_MAP_MAP.put('\uE084', "\u003C");
120 CharConverter.EARLY_MAP_MAP.put('\uE085', "\u003E");
121 CharConverter.EARLY_MAP_MAP.put('\uE086', "\ue425");
122 CharConverter.EARLY_MAP_MAP.put('\uE087', "\ue421");
123 CharConverter.EARLY_MAP_MAP.put('\uE089', "\u2208");
124 CharConverter.EARLY_MAP_MAP.put('\uE08A', "\u0192");
125 CharConverter.EARLY_MAP_MAP.put('\uE08B', "\u2026");
126 CharConverter.EARLY_MAP_MAP.put('\uE08C', "\u2192");
127 CharConverter.EARLY_MAP_MAP.put('\uE091', "\u0302");
128 CharConverter.EARLY_MAP_MAP.put('\uE092', "\u030C");
129 CharConverter.EARLY_MAP_MAP.put('\uE093', "\u0306");
130 CharConverter.EARLY_MAP_MAP.put('\uE094', "\u0301");
131 CharConverter.EARLY_MAP_MAP.put('\uE095', "\u0300");
132 CharConverter.EARLY_MAP_MAP.put('\uE096', "\u0303");
133 CharConverter.EARLY_MAP_MAP.put('\uE097', "\u0304");
134 // Was: 20D7, but 2192 is more widely supported
135 CharConverter.EARLY_MAP_MAP.put('\uE098', "\u2192");
136 CharConverter.EARLY_MAP_MAP.put('\uE099', "\u02d9");
137 CharConverter.EARLY_MAP_MAP.put('\uE09A', "\u0308");
138 CharConverter.EARLY_MAP_MAP.put('\uE09B', "\u20DB");
139 CharConverter.EARLY_MAP_MAP.put('\uE09C', "\u030A");
140 CharConverter.EARLY_MAP_MAP.put('\uE09E', "\u0028");
141 CharConverter.EARLY_MAP_MAP.put('\uE09F', "\u0029");
142 CharConverter.EARLY_MAP_MAP.put('\uE0A2', "\u301A");
143 CharConverter.EARLY_MAP_MAP.put('\uE0A3', "\u301B");
144 CharConverter.EARLY_MAP_MAP.put('\uE0A4', "\u2373");
145 CharConverter.EARLY_MAP_MAP.put('\uE0A8', "\u002F");
146 CharConverter.EARLY_MAP_MAP.put('\uE0A9', "\\");
147 CharConverter.EARLY_MAP_MAP.put('\uE0AA', "\u274F");
148 CharConverter.EARLY_MAP_MAP.put('\uE0AC', "\u0393");
149 CharConverter.EARLY_MAP_MAP.put('\uE0AD', "\u0394");
150 CharConverter.EARLY_MAP_MAP.put('\uE0AE', "\u0398");
151 CharConverter.EARLY_MAP_MAP.put('\uE0AF', "\u039b");
152 CharConverter.EARLY_MAP_MAP.put('\uE0B0', "\u039e");
153 CharConverter.EARLY_MAP_MAP.put('\uE0B1', "\u03A0");
154 CharConverter.EARLY_MAP_MAP.put('\uE0B2', "\u03a3");
155 CharConverter.EARLY_MAP_MAP.put('\uE0B3', "\u03a5");
156 CharConverter.EARLY_MAP_MAP.put('\uE0B4', "\u03a6");
157 CharConverter.EARLY_MAP_MAP.put('\uE0B5', "\u03a8");
158 CharConverter.EARLY_MAP_MAP.put('\uE0B6', "\u03a9");
159 CharConverter.EARLY_MAP_MAP.put('\uE0B7', "\u03b1");
160 CharConverter.EARLY_MAP_MAP.put('\uE0B8', "\u03b2");
161 CharConverter.EARLY_MAP_MAP.put('\uE0B9', "\u03b3");
162 CharConverter.EARLY_MAP_MAP.put('\uE0BA', "\u03b4");
163 CharConverter.EARLY_MAP_MAP.put('\uE0BB', "\u03b5");
164 CharConverter.EARLY_MAP_MAP.put('\uE0BC', "\u03b6");
165 CharConverter.EARLY_MAP_MAP.put('\uE0BD', "\u03b7");
166 CharConverter.EARLY_MAP_MAP.put('\uE0BE', "\u03b8");
167 CharConverter.EARLY_MAP_MAP.put('\uE0BF', "\u03b9");
168 CharConverter.EARLY_MAP_MAP.put('\uE0C0', "\u03ba");
169 CharConverter.EARLY_MAP_MAP.put('\uE0C1', "\u03bb");
170 CharConverter.EARLY_MAP_MAP.put('\uE0C2', "\u03bc");
171 CharConverter.EARLY_MAP_MAP.put('\uE0C3', "\u03bd");
172 CharConverter.EARLY_MAP_MAP.put('\uE0C4', "\u03be");
173 CharConverter.EARLY_MAP_MAP.put('\uE0C5', "\u03bf");
174 CharConverter.EARLY_MAP_MAP.put('\uE0C6', "\u03c0");
175 CharConverter.EARLY_MAP_MAP.put('\uE0C7', "\u03c1");
176 CharConverter.EARLY_MAP_MAP.put('\uE0C8', "\u03c3");
177 CharConverter.EARLY_MAP_MAP.put('\uE0C9', "\u03c4");
178 CharConverter.EARLY_MAP_MAP.put('\uE0CA', "\u03c5");
179 CharConverter.EARLY_MAP_MAP.put('\uE0CB', "\u03c6");
180 CharConverter.EARLY_MAP_MAP.put('\uE0CC', "\u03c7");
181 CharConverter.EARLY_MAP_MAP.put('\uE0CD', "\u03c8");
182 CharConverter.EARLY_MAP_MAP.put('\uE0CE', "\u03c9");
183 CharConverter.EARLY_MAP_MAP.put('\uE0CF', "\u03b5");
184 CharConverter.EARLY_MAP_MAP.put('\uE0D0', "\u03d1");
185 CharConverter.EARLY_MAP_MAP.put('\uE0D1', "\u03d6");
186 CharConverter.EARLY_MAP_MAP.put('\uE0D2', "\u03f1");
187 CharConverter.EARLY_MAP_MAP.put('\uE0D3', "\u03db");
188 CharConverter.EARLY_MAP_MAP.put('\uE0D4', "\u2118");
189 CharConverter.EARLY_MAP_MAP.put('\uE0D5', "\u2202");
190 CharConverter.EARLY_MAP_MAP.put('\uE0D6', "\u2129");
191 CharConverter.EARLY_MAP_MAP.put('\uE0D7', "\u2107");
192 CharConverter.EARLY_MAP_MAP.put('\uE0D8', "\u2127");
193 CharConverter.EARLY_MAP_MAP.put('\uE0D9', "\u22A4");
194 CharConverter.EARLY_MAP_MAP.put('\uE0DA', "\u019B");
195 CharConverter.EARLY_MAP_MAP.put('\uE0DB', "\u2190");
196 CharConverter.EARLY_MAP_MAP.put('\uE0DC', "\u2191");
197 CharConverter.EARLY_MAP_MAP.put('\uE0DD', "\u2193");
198
199 // This set is generated by LaTeXMathML, and seems to be the set
200 // supported via Mathematica 4.1 fonts in Firefox 2.0
201
202 // SCRIPT CAPITALS
203 CharConverter.EARLY_MAP_MAP.put('\uEF35', new String(
204 new int[] { 0x1D49C }, 0, 1));
205 CharConverter.EARLY_MAP_MAP.put('\uEF36', new String(
206 new int[] { 0x1D49E }, 0, 1));
207 CharConverter.EARLY_MAP_MAP.put('\uEF37', new String(
208 new int[] { 0x1D49F }, 0, 1));
209 CharConverter.EARLY_MAP_MAP.put('\uEF38', new String(
210 new int[] { 0x1D4A2 }, 0, 1));
211 CharConverter.EARLY_MAP_MAP.put('\uEF39', new String(
212 new int[] { 0x1D4A5 }, 0, 1));
213 CharConverter.EARLY_MAP_MAP.put('\uEF3A', new String(
214 new int[] { 0x1D4A6 }, 0, 1));
215 CharConverter.EARLY_MAP_MAP.put('\uEF3B', new String(
216 new int[] { 0x1D4A9 }, 0, 1));
217 CharConverter.EARLY_MAP_MAP.put('\uEF3C', new String(
218 new int[] { 0x1D4AA }, 0, 1));
219 CharConverter.EARLY_MAP_MAP.put('\uEF3D', new String(
220 new int[] { 0x1D4AB }, 0, 1));
221 CharConverter.EARLY_MAP_MAP.put('\uEF3E', new String(
222 new int[] { 0x1D4AC }, 0, 1));
223 CharConverter.EARLY_MAP_MAP.put('\uEF3F', new String(
224 new int[] { 0x1D4AE }, 0, 1));
225 CharConverter.EARLY_MAP_MAP.put('\uEF40', new String(
226 new int[] { 0x1D4AF }, 0, 1));
227 CharConverter.EARLY_MAP_MAP.put('\uEF41', new String(
228 new int[] { 0x1D4B0 }, 0, 1));
229 CharConverter.EARLY_MAP_MAP.put('\uEF42', new String(
230 new int[] { 0x1D4B1 }, 0, 1));
231 CharConverter.EARLY_MAP_MAP.put('\uEF43', new String(
232 new int[] { 0x1D4B2 }, 0, 1));
233 CharConverter.EARLY_MAP_MAP.put('\uEF44', new String(
234 new int[] { 0x1D4B3 }, 0, 1));
235 CharConverter.EARLY_MAP_MAP.put('\uEF45', new String(
236 new int[] { 0x1D4B4 }, 0, 1));
237 CharConverter.EARLY_MAP_MAP.put('\uEF46', new String(
238 new int[] { 0x1D4B5 }, 0, 1));
239
240 // FRAKTUR
241 CharConverter.EARLY_MAP_MAP.put('\uEF5D', new String(
242 new int[] { 0x1D504 }, 0, 1));
243 CharConverter.EARLY_MAP_MAP.put('\uEF5E', new String(
244 new int[] { 0x1D505 }, 0, 1));
245 CharConverter.EARLY_MAP_MAP.put('\uEF5F', new String(
246 new int[] { 0x1D507 }, 0, 1));
247 CharConverter.EARLY_MAP_MAP.put('\uEF60', new String(
248 new int[] { 0x1D508 }, 0, 1));
249 CharConverter.EARLY_MAP_MAP.put('\uEF61', new String(
250 new int[] { 0x1D509 }, 0, 1));
251 CharConverter.EARLY_MAP_MAP.put('\uEF62', new String(
252 new int[] { 0x1D50A }, 0, 1));
253 CharConverter.EARLY_MAP_MAP.put('\uEF63', new String(
254 new int[] { 0x1D50D }, 0, 1));
255 CharConverter.EARLY_MAP_MAP.put('\uEF64', new String(
256 new int[] { 0x1D50E }, 0, 1));
257 CharConverter.EARLY_MAP_MAP.put('\uEF65', new String(
258 new int[] { 0x1D50F }, 0, 1));
259 CharConverter.EARLY_MAP_MAP.put('\uEF66', new String(
260 new int[] { 0x1D510 }, 0, 1));
261 CharConverter.EARLY_MAP_MAP.put('\uEF67', new String(
262 new int[] { 0x1D511 }, 0, 1));
263 CharConverter.EARLY_MAP_MAP.put('\uEF68', new String(
264 new int[] { 0x1D512 }, 0, 1));
265 CharConverter.EARLY_MAP_MAP.put('\uEF69', new String(
266 new int[] { 0x1D513 }, 0, 1));
267 CharConverter.EARLY_MAP_MAP.put('\uEF6A', new String(
268 new int[] { 0x1D514 }, 0, 1));
269 CharConverter.EARLY_MAP_MAP.put('\uEF6B', new String(
270 new int[] { 0x1D516 }, 0, 1));
271 CharConverter.EARLY_MAP_MAP.put('\uEF6C', new String(
272 new int[] { 0x1D517 }, 0, 1));
273 CharConverter.EARLY_MAP_MAP.put('\uEF6D', new String(
274 new int[] { 0x1D518 }, 0, 1));
275 CharConverter.EARLY_MAP_MAP.put('\uEF6E', new String(
276 new int[] { 0x1D519 }, 0, 1));
277 CharConverter.EARLY_MAP_MAP.put('\uEF6F', new String(
278 new int[] { 0x1D51A }, 0, 1));
279 CharConverter.EARLY_MAP_MAP.put('\uEF70', new String(
280 new int[] { 0x1D51B }, 0, 1));
281 CharConverter.EARLY_MAP_MAP.put('\uEF71', new String(
282 new int[] { 0x1D51C }, 0, 1));
283
284 // DOUBLE_STRUCK CAPITALS
285 CharConverter.EARLY_MAP_MAP.put('\uEF8C', new String(
286 new int[] { 0x1D538 }, 0, 1));
287 CharConverter.EARLY_MAP_MAP.put('\uEF8D', new String(
288 new int[] { 0x1D539 }, 0, 1));
289 CharConverter.EARLY_MAP_MAP.put('\uEF8E', new String(
290 new int[] { 0x1D53B }, 0, 1));
291 CharConverter.EARLY_MAP_MAP.put('\uEF8F', new String(
292 new int[] { 0x1D53C }, 0, 1));
293 CharConverter.EARLY_MAP_MAP.put('\uEF90', new String(
294 new int[] { 0x1D53D }, 0, 1));
295 CharConverter.EARLY_MAP_MAP.put('\uEF91', new String(
296 new int[] { 0x1D53E }, 0, 1));
297 CharConverter.EARLY_MAP_MAP.put('\uEF92', new String(
298 new int[] { 0x1D540 }, 0, 1));
299 CharConverter.EARLY_MAP_MAP.put('\uEF93', new String(
300 new int[] { 0x1D541 }, 0, 1));
301 CharConverter.EARLY_MAP_MAP.put('\uEF94', new String(
302 new int[] { 0x1D542 }, 0, 1));
303 CharConverter.EARLY_MAP_MAP.put('\uEF95', new String(
304 new int[] { 0x1D543 }, 0, 1));
305 CharConverter.EARLY_MAP_MAP.put('\uEF96', new String(
306 new int[] { 0x1D544 }, 0, 1));
307 CharConverter.EARLY_MAP_MAP.put('\uEF97', new String(
308 new int[] { 0x1D546 }, 0, 1));
309 CharConverter.EARLY_MAP_MAP.put('\uEF98', new String(
310 new int[] { 0x1D54A }, 0, 1));
311 CharConverter.EARLY_MAP_MAP.put('\uEF99', new String(
312 new int[] { 0x1D54B }, 0, 1));
313 CharConverter.EARLY_MAP_MAP.put('\uEF9A', new String(
314 new int[] { 0x1D54C }, 0, 1));
315 CharConverter.EARLY_MAP_MAP.put('\uEF9B', new String(
316 new int[] { 0x1D54D }, 0, 1));
317 CharConverter.EARLY_MAP_MAP.put('\uEF9C', new String(
318 new int[] { 0x1D54E }, 0, 1));
319 CharConverter.EARLY_MAP_MAP.put('\uEF9D', new String(
320 new int[] { 0x1D54F }, 0, 1));
321 CharConverter.EARLY_MAP_MAP.put('\uEF9E', new String(
322 new int[] { 0x1D550 }, 0, 1));
323 // CHECKSTYLE:ON
324 }
325 }