001    /*
002     * Copyright 2002 - 2006 JEuclid, http://jeuclid.sf.net
003     * 
004     * Licensed under the Apache License, Version 2.0 (the "License");
005     * you may not use this file except in compliance with the License.
006     * You may obtain a copy of the License at
007     *
008     *      http://www.apache.org/licenses/LICENSE-2.0
009     *
010     * Unless required by applicable law or agreed to in writing, software
011     * distributed under the License is distributed on an "AS IS" BASIS,
012     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013     * See the License for the specific language governing permissions and
014     * limitations under the License.
015     */
016    
017    /* $Id: CharConverter.java,v c507eb905870 2008/05/15 16:59:47 maxberger $ */
018    
019    package net.sourceforge.jeuclid.elements.support.text;
020    
021    import java.util.HashMap;
022    import java.util.Map;
023    
024    /**
025     * class for char converting.
026     * 
027     * @version $Revision: c507eb905870 $
028     */
029    public final class CharConverter {
030    
031        /**
032         * Char equivalents to be mapped immediately before display.
033         */
034        private static final Map<Character, String> LATE_MAP_MAP = new HashMap<Character, String>();
035    
036        /**
037         * Char equivalents to be mapped when parsing.
038         */
039        private static final Map<Character, String> EARLY_MAP_MAP = new HashMap<Character, String>(
040                200);
041    
042        private CharConverter() {
043            // Empty on purpose.
044        }
045    
046        /**
047         * @param string
048         *            String for char replacing
049         * @return result string
050         */
051        private static String actualConvert(final String string,
052                final Map<Character, String> map) {
053            final StringBuffer buffer = new StringBuffer();
054            for (int i = 0; i < string.length(); i++) {
055                final char orig = string.charAt(i);
056                final String mapsTo = map.get(orig);
057                if (mapsTo == null) {
058                    buffer.append(orig);
059                } else {
060                    buffer.append(mapsTo);
061                }
062            }
063            return buffer.toString();
064        }
065    
066        /**
067         * @param string
068         *            String for char replacing
069         * @return result string
070         */
071        public static String convertEarly(final String string) {
072            return CharConverter.actualConvert(string,
073                    CharConverter.EARLY_MAP_MAP);
074        }
075    
076        /**
077         * @param string
078         *            String for char replacing
079         * @return result string
080         */
081        public static String convertLate(final String string) {
082            return CharConverter
083                    .actualConvert(string, CharConverter.LATE_MAP_MAP);
084        }
085    
086        // CHECKSTYLE:OFF
087        // Too many statements, but this is initialization!
088        static {
089            CharConverter.LATE_MAP_MAP.put('\u2061', "");
090            CharConverter.LATE_MAP_MAP.put('\u200b', "");
091            CharConverter.LATE_MAP_MAP.put('\u2062', "");
092            CharConverter.LATE_MAP_MAP.put('\u2148', "");
093            /*
094             * This maps UnderBar -> Overbar. The regular mapping of underbars
095             * (0332) is a combining character, which produces incorrect text
096             * metrics.
097             * 
098             * Underscore (_) should be used, but then the information about
099             * strechting is lost.
100             * 
101             * OverBars are higher in the layout. However, UnderBars are usually
102             * only used in underscripts, where this produces no problem.
103             * 
104             * TODO: Check if there are other combining characters among the
105             * default entities and map them accordingly.
106             */
107            CharConverter.LATE_MAP_MAP.put('\u0332', "\u00AF");
108    
109            /*
110             * These are created by OpenOffice formula < 2.2. See
111             * http://www.openoffice.org/servlets/ReadMsg?list=dev&msgNo=543
112             * 
113             * These are mapping from the private area of the "starSymbol" (now
114             * 'openSymbol') font.
115             */
116            CharConverter.EARLY_MAP_MAP.put('\uE080', "\u2031");
117            CharConverter.EARLY_MAP_MAP.put('\uE081', "\uF613");
118            CharConverter.EARLY_MAP_MAP.put('\uE083', "\u002B");
119            CharConverter.EARLY_MAP_MAP.put('\uE084', "\u003C");
120            CharConverter.EARLY_MAP_MAP.put('\uE085', "\u003E");
121            CharConverter.EARLY_MAP_MAP.put('\uE086', "\ue425");
122            CharConverter.EARLY_MAP_MAP.put('\uE087', "\ue421");
123            CharConverter.EARLY_MAP_MAP.put('\uE089', "\u2208");
124            CharConverter.EARLY_MAP_MAP.put('\uE08A', "\u0192");
125            CharConverter.EARLY_MAP_MAP.put('\uE08B', "\u2026");
126            CharConverter.EARLY_MAP_MAP.put('\uE08C', "\u2192");
127            CharConverter.EARLY_MAP_MAP.put('\uE091', "\u0302");
128            CharConverter.EARLY_MAP_MAP.put('\uE092', "\u030C");
129            CharConverter.EARLY_MAP_MAP.put('\uE093', "\u0306");
130            CharConverter.EARLY_MAP_MAP.put('\uE094', "\u0301");
131            CharConverter.EARLY_MAP_MAP.put('\uE095', "\u0300");
132            CharConverter.EARLY_MAP_MAP.put('\uE096', "\u0303");
133            CharConverter.EARLY_MAP_MAP.put('\uE097', "\u0304");
134            // Was: 20D7, but 2192 is more widely supported
135            CharConverter.EARLY_MAP_MAP.put('\uE098', "\u2192");
136            CharConverter.EARLY_MAP_MAP.put('\uE099', "\u02d9");
137            CharConverter.EARLY_MAP_MAP.put('\uE09A', "\u0308");
138            CharConverter.EARLY_MAP_MAP.put('\uE09B', "\u20DB");
139            CharConverter.EARLY_MAP_MAP.put('\uE09C', "\u030A");
140            CharConverter.EARLY_MAP_MAP.put('\uE09E', "\u0028");
141            CharConverter.EARLY_MAP_MAP.put('\uE09F', "\u0029");
142            CharConverter.EARLY_MAP_MAP.put('\uE0A2', "\u301A");
143            CharConverter.EARLY_MAP_MAP.put('\uE0A3', "\u301B");
144            CharConverter.EARLY_MAP_MAP.put('\uE0A4', "\u2373");
145            CharConverter.EARLY_MAP_MAP.put('\uE0A8', "\u002F");
146            CharConverter.EARLY_MAP_MAP.put('\uE0A9', "\\");
147            CharConverter.EARLY_MAP_MAP.put('\uE0AA', "\u274F");
148            CharConverter.EARLY_MAP_MAP.put('\uE0AC', "\u0393");
149            CharConverter.EARLY_MAP_MAP.put('\uE0AD', "\u0394");
150            CharConverter.EARLY_MAP_MAP.put('\uE0AE', "\u0398");
151            CharConverter.EARLY_MAP_MAP.put('\uE0AF', "\u039b");
152            CharConverter.EARLY_MAP_MAP.put('\uE0B0', "\u039e");
153            CharConverter.EARLY_MAP_MAP.put('\uE0B1', "\u03A0");
154            CharConverter.EARLY_MAP_MAP.put('\uE0B2', "\u03a3");
155            CharConverter.EARLY_MAP_MAP.put('\uE0B3', "\u03a5");
156            CharConverter.EARLY_MAP_MAP.put('\uE0B4', "\u03a6");
157            CharConverter.EARLY_MAP_MAP.put('\uE0B5', "\u03a8");
158            CharConverter.EARLY_MAP_MAP.put('\uE0B6', "\u03a9");
159            CharConverter.EARLY_MAP_MAP.put('\uE0B7', "\u03b1");
160            CharConverter.EARLY_MAP_MAP.put('\uE0B8', "\u03b2");
161            CharConverter.EARLY_MAP_MAP.put('\uE0B9', "\u03b3");
162            CharConverter.EARLY_MAP_MAP.put('\uE0BA', "\u03b4");
163            CharConverter.EARLY_MAP_MAP.put('\uE0BB', "\u03b5");
164            CharConverter.EARLY_MAP_MAP.put('\uE0BC', "\u03b6");
165            CharConverter.EARLY_MAP_MAP.put('\uE0BD', "\u03b7");
166            CharConverter.EARLY_MAP_MAP.put('\uE0BE', "\u03b8");
167            CharConverter.EARLY_MAP_MAP.put('\uE0BF', "\u03b9");
168            CharConverter.EARLY_MAP_MAP.put('\uE0C0', "\u03ba");
169            CharConverter.EARLY_MAP_MAP.put('\uE0C1', "\u03bb");
170            CharConverter.EARLY_MAP_MAP.put('\uE0C2', "\u03bc");
171            CharConverter.EARLY_MAP_MAP.put('\uE0C3', "\u03bd");
172            CharConverter.EARLY_MAP_MAP.put('\uE0C4', "\u03be");
173            CharConverter.EARLY_MAP_MAP.put('\uE0C5', "\u03bf");
174            CharConverter.EARLY_MAP_MAP.put('\uE0C6', "\u03c0");
175            CharConverter.EARLY_MAP_MAP.put('\uE0C7', "\u03c1");
176            CharConverter.EARLY_MAP_MAP.put('\uE0C8', "\u03c3");
177            CharConverter.EARLY_MAP_MAP.put('\uE0C9', "\u03c4");
178            CharConverter.EARLY_MAP_MAP.put('\uE0CA', "\u03c5");
179            CharConverter.EARLY_MAP_MAP.put('\uE0CB', "\u03c6");
180            CharConverter.EARLY_MAP_MAP.put('\uE0CC', "\u03c7");
181            CharConverter.EARLY_MAP_MAP.put('\uE0CD', "\u03c8");
182            CharConverter.EARLY_MAP_MAP.put('\uE0CE', "\u03c9");
183            CharConverter.EARLY_MAP_MAP.put('\uE0CF', "\u03b5");
184            CharConverter.EARLY_MAP_MAP.put('\uE0D0', "\u03d1");
185            CharConverter.EARLY_MAP_MAP.put('\uE0D1', "\u03d6");
186            CharConverter.EARLY_MAP_MAP.put('\uE0D2', "\u03f1");
187            CharConverter.EARLY_MAP_MAP.put('\uE0D3', "\u03db");
188            CharConverter.EARLY_MAP_MAP.put('\uE0D4', "\u2118");
189            CharConverter.EARLY_MAP_MAP.put('\uE0D5', "\u2202");
190            CharConverter.EARLY_MAP_MAP.put('\uE0D6', "\u2129");
191            CharConverter.EARLY_MAP_MAP.put('\uE0D7', "\u2107");
192            CharConverter.EARLY_MAP_MAP.put('\uE0D8', "\u2127");
193            CharConverter.EARLY_MAP_MAP.put('\uE0D9', "\u22A4");
194            CharConverter.EARLY_MAP_MAP.put('\uE0DA', "\u019B");
195            CharConverter.EARLY_MAP_MAP.put('\uE0DB', "\u2190");
196            CharConverter.EARLY_MAP_MAP.put('\uE0DC', "\u2191");
197            CharConverter.EARLY_MAP_MAP.put('\uE0DD', "\u2193");
198    
199            // This set is generated by LaTeXMathML, and seems to be the set
200            // supported via Mathematica 4.1 fonts in Firefox 2.0
201    
202            // SCRIPT CAPITALS
203            CharConverter.EARLY_MAP_MAP.put('\uEF35', new String(
204                    new int[] { 0x1D49C }, 0, 1));
205            CharConverter.EARLY_MAP_MAP.put('\uEF36', new String(
206                    new int[] { 0x1D49E }, 0, 1));
207            CharConverter.EARLY_MAP_MAP.put('\uEF37', new String(
208                    new int[] { 0x1D49F }, 0, 1));
209            CharConverter.EARLY_MAP_MAP.put('\uEF38', new String(
210                    new int[] { 0x1D4A2 }, 0, 1));
211            CharConverter.EARLY_MAP_MAP.put('\uEF39', new String(
212                    new int[] { 0x1D4A5 }, 0, 1));
213            CharConverter.EARLY_MAP_MAP.put('\uEF3A', new String(
214                    new int[] { 0x1D4A6 }, 0, 1));
215            CharConverter.EARLY_MAP_MAP.put('\uEF3B', new String(
216                    new int[] { 0x1D4A9 }, 0, 1));
217            CharConverter.EARLY_MAP_MAP.put('\uEF3C', new String(
218                    new int[] { 0x1D4AA }, 0, 1));
219            CharConverter.EARLY_MAP_MAP.put('\uEF3D', new String(
220                    new int[] { 0x1D4AB }, 0, 1));
221            CharConverter.EARLY_MAP_MAP.put('\uEF3E', new String(
222                    new int[] { 0x1D4AC }, 0, 1));
223            CharConverter.EARLY_MAP_MAP.put('\uEF3F', new String(
224                    new int[] { 0x1D4AE }, 0, 1));
225            CharConverter.EARLY_MAP_MAP.put('\uEF40', new String(
226                    new int[] { 0x1D4AF }, 0, 1));
227            CharConverter.EARLY_MAP_MAP.put('\uEF41', new String(
228                    new int[] { 0x1D4B0 }, 0, 1));
229            CharConverter.EARLY_MAP_MAP.put('\uEF42', new String(
230                    new int[] { 0x1D4B1 }, 0, 1));
231            CharConverter.EARLY_MAP_MAP.put('\uEF43', new String(
232                    new int[] { 0x1D4B2 }, 0, 1));
233            CharConverter.EARLY_MAP_MAP.put('\uEF44', new String(
234                    new int[] { 0x1D4B3 }, 0, 1));
235            CharConverter.EARLY_MAP_MAP.put('\uEF45', new String(
236                    new int[] { 0x1D4B4 }, 0, 1));
237            CharConverter.EARLY_MAP_MAP.put('\uEF46', new String(
238                    new int[] { 0x1D4B5 }, 0, 1));
239    
240            // FRAKTUR
241            CharConverter.EARLY_MAP_MAP.put('\uEF5D', new String(
242                    new int[] { 0x1D504 }, 0, 1));
243            CharConverter.EARLY_MAP_MAP.put('\uEF5E', new String(
244                    new int[] { 0x1D505 }, 0, 1));
245            CharConverter.EARLY_MAP_MAP.put('\uEF5F', new String(
246                    new int[] { 0x1D507 }, 0, 1));
247            CharConverter.EARLY_MAP_MAP.put('\uEF60', new String(
248                    new int[] { 0x1D508 }, 0, 1));
249            CharConverter.EARLY_MAP_MAP.put('\uEF61', new String(
250                    new int[] { 0x1D509 }, 0, 1));
251            CharConverter.EARLY_MAP_MAP.put('\uEF62', new String(
252                    new int[] { 0x1D50A }, 0, 1));
253            CharConverter.EARLY_MAP_MAP.put('\uEF63', new String(
254                    new int[] { 0x1D50D }, 0, 1));
255            CharConverter.EARLY_MAP_MAP.put('\uEF64', new String(
256                    new int[] { 0x1D50E }, 0, 1));
257            CharConverter.EARLY_MAP_MAP.put('\uEF65', new String(
258                    new int[] { 0x1D50F }, 0, 1));
259            CharConverter.EARLY_MAP_MAP.put('\uEF66', new String(
260                    new int[] { 0x1D510 }, 0, 1));
261            CharConverter.EARLY_MAP_MAP.put('\uEF67', new String(
262                    new int[] { 0x1D511 }, 0, 1));
263            CharConverter.EARLY_MAP_MAP.put('\uEF68', new String(
264                    new int[] { 0x1D512 }, 0, 1));
265            CharConverter.EARLY_MAP_MAP.put('\uEF69', new String(
266                    new int[] { 0x1D513 }, 0, 1));
267            CharConverter.EARLY_MAP_MAP.put('\uEF6A', new String(
268                    new int[] { 0x1D514 }, 0, 1));
269            CharConverter.EARLY_MAP_MAP.put('\uEF6B', new String(
270                    new int[] { 0x1D516 }, 0, 1));
271            CharConverter.EARLY_MAP_MAP.put('\uEF6C', new String(
272                    new int[] { 0x1D517 }, 0, 1));
273            CharConverter.EARLY_MAP_MAP.put('\uEF6D', new String(
274                    new int[] { 0x1D518 }, 0, 1));
275            CharConverter.EARLY_MAP_MAP.put('\uEF6E', new String(
276                    new int[] { 0x1D519 }, 0, 1));
277            CharConverter.EARLY_MAP_MAP.put('\uEF6F', new String(
278                    new int[] { 0x1D51A }, 0, 1));
279            CharConverter.EARLY_MAP_MAP.put('\uEF70', new String(
280                    new int[] { 0x1D51B }, 0, 1));
281            CharConverter.EARLY_MAP_MAP.put('\uEF71', new String(
282                    new int[] { 0x1D51C }, 0, 1));
283    
284            // DOUBLE_STRUCK CAPITALS
285            CharConverter.EARLY_MAP_MAP.put('\uEF8C', new String(
286                    new int[] { 0x1D538 }, 0, 1));
287            CharConverter.EARLY_MAP_MAP.put('\uEF8D', new String(
288                    new int[] { 0x1D539 }, 0, 1));
289            CharConverter.EARLY_MAP_MAP.put('\uEF8E', new String(
290                    new int[] { 0x1D53B }, 0, 1));
291            CharConverter.EARLY_MAP_MAP.put('\uEF8F', new String(
292                    new int[] { 0x1D53C }, 0, 1));
293            CharConverter.EARLY_MAP_MAP.put('\uEF90', new String(
294                    new int[] { 0x1D53D }, 0, 1));
295            CharConverter.EARLY_MAP_MAP.put('\uEF91', new String(
296                    new int[] { 0x1D53E }, 0, 1));
297            CharConverter.EARLY_MAP_MAP.put('\uEF92', new String(
298                    new int[] { 0x1D540 }, 0, 1));
299            CharConverter.EARLY_MAP_MAP.put('\uEF93', new String(
300                    new int[] { 0x1D541 }, 0, 1));
301            CharConverter.EARLY_MAP_MAP.put('\uEF94', new String(
302                    new int[] { 0x1D542 }, 0, 1));
303            CharConverter.EARLY_MAP_MAP.put('\uEF95', new String(
304                    new int[] { 0x1D543 }, 0, 1));
305            CharConverter.EARLY_MAP_MAP.put('\uEF96', new String(
306                    new int[] { 0x1D544 }, 0, 1));
307            CharConverter.EARLY_MAP_MAP.put('\uEF97', new String(
308                    new int[] { 0x1D546 }, 0, 1));
309            CharConverter.EARLY_MAP_MAP.put('\uEF98', new String(
310                    new int[] { 0x1D54A }, 0, 1));
311            CharConverter.EARLY_MAP_MAP.put('\uEF99', new String(
312                    new int[] { 0x1D54B }, 0, 1));
313            CharConverter.EARLY_MAP_MAP.put('\uEF9A', new String(
314                    new int[] { 0x1D54C }, 0, 1));
315            CharConverter.EARLY_MAP_MAP.put('\uEF9B', new String(
316                    new int[] { 0x1D54D }, 0, 1));
317            CharConverter.EARLY_MAP_MAP.put('\uEF9C', new String(
318                    new int[] { 0x1D54E }, 0, 1));
319            CharConverter.EARLY_MAP_MAP.put('\uEF9D', new String(
320                    new int[] { 0x1D54F }, 0, 1));
321            CharConverter.EARLY_MAP_MAP.put('\uEF9E', new String(
322                    new int[] { 0x1D550 }, 0, 1));
323            // CHECKSTYLE:ON
324        }
325    }