001 /* 002 * Copyright 2002 - 2006 JEuclid, http://jeuclid.sf.net 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017 /* $Id: CharConverter.java,v c507eb905870 2008/05/15 16:59:47 maxberger $ */ 018 019 package net.sourceforge.jeuclid.elements.support.text; 020 021 import java.util.HashMap; 022 import java.util.Map; 023 024 /** 025 * class for char converting. 026 * 027 * @version $Revision: c507eb905870 $ 028 */ 029 public final class CharConverter { 030 031 /** 032 * Char equivalents to be mapped immediately before display. 033 */ 034 private static final Map<Character, String> LATE_MAP_MAP = new HashMap<Character, String>(); 035 036 /** 037 * Char equivalents to be mapped when parsing. 038 */ 039 private static final Map<Character, String> EARLY_MAP_MAP = new HashMap<Character, String>( 040 200); 041 042 private CharConverter() { 043 // Empty on purpose. 044 } 045 046 /** 047 * @param string 048 * String for char replacing 049 * @return result string 050 */ 051 private static String actualConvert(final String string, 052 final Map<Character, String> map) { 053 final StringBuffer buffer = new StringBuffer(); 054 for (int i = 0; i < string.length(); i++) { 055 final char orig = string.charAt(i); 056 final String mapsTo = map.get(orig); 057 if (mapsTo == null) { 058 buffer.append(orig); 059 } else { 060 buffer.append(mapsTo); 061 } 062 } 063 return buffer.toString(); 064 } 065 066 /** 067 * @param string 068 * String for char replacing 069 * @return result string 070 */ 071 public static String convertEarly(final String string) { 072 return CharConverter.actualConvert(string, 073 CharConverter.EARLY_MAP_MAP); 074 } 075 076 /** 077 * @param string 078 * String for char replacing 079 * @return result string 080 */ 081 public static String convertLate(final String string) { 082 return CharConverter 083 .actualConvert(string, CharConverter.LATE_MAP_MAP); 084 } 085 086 // CHECKSTYLE:OFF 087 // Too many statements, but this is initialization! 088 static { 089 CharConverter.LATE_MAP_MAP.put('\u2061', ""); 090 CharConverter.LATE_MAP_MAP.put('\u200b', ""); 091 CharConverter.LATE_MAP_MAP.put('\u2062', ""); 092 CharConverter.LATE_MAP_MAP.put('\u2148', ""); 093 /* 094 * This maps UnderBar -> Overbar. The regular mapping of underbars 095 * (0332) is a combining character, which produces incorrect text 096 * metrics. 097 * 098 * Underscore (_) should be used, but then the information about 099 * strechting is lost. 100 * 101 * OverBars are higher in the layout. However, UnderBars are usually 102 * only used in underscripts, where this produces no problem. 103 * 104 * TODO: Check if there are other combining characters among the 105 * default entities and map them accordingly. 106 */ 107 CharConverter.LATE_MAP_MAP.put('\u0332', "\u00AF"); 108 109 /* 110 * These are created by OpenOffice formula < 2.2. See 111 * http://www.openoffice.org/servlets/ReadMsg?list=dev&msgNo=543 112 * 113 * These are mapping from the private area of the "starSymbol" (now 114 * 'openSymbol') font. 115 */ 116 CharConverter.EARLY_MAP_MAP.put('\uE080', "\u2031"); 117 CharConverter.EARLY_MAP_MAP.put('\uE081', "\uF613"); 118 CharConverter.EARLY_MAP_MAP.put('\uE083', "\u002B"); 119 CharConverter.EARLY_MAP_MAP.put('\uE084', "\u003C"); 120 CharConverter.EARLY_MAP_MAP.put('\uE085', "\u003E"); 121 CharConverter.EARLY_MAP_MAP.put('\uE086', "\ue425"); 122 CharConverter.EARLY_MAP_MAP.put('\uE087', "\ue421"); 123 CharConverter.EARLY_MAP_MAP.put('\uE089', "\u2208"); 124 CharConverter.EARLY_MAP_MAP.put('\uE08A', "\u0192"); 125 CharConverter.EARLY_MAP_MAP.put('\uE08B', "\u2026"); 126 CharConverter.EARLY_MAP_MAP.put('\uE08C', "\u2192"); 127 CharConverter.EARLY_MAP_MAP.put('\uE091', "\u0302"); 128 CharConverter.EARLY_MAP_MAP.put('\uE092', "\u030C"); 129 CharConverter.EARLY_MAP_MAP.put('\uE093', "\u0306"); 130 CharConverter.EARLY_MAP_MAP.put('\uE094', "\u0301"); 131 CharConverter.EARLY_MAP_MAP.put('\uE095', "\u0300"); 132 CharConverter.EARLY_MAP_MAP.put('\uE096', "\u0303"); 133 CharConverter.EARLY_MAP_MAP.put('\uE097', "\u0304"); 134 // Was: 20D7, but 2192 is more widely supported 135 CharConverter.EARLY_MAP_MAP.put('\uE098', "\u2192"); 136 CharConverter.EARLY_MAP_MAP.put('\uE099', "\u02d9"); 137 CharConverter.EARLY_MAP_MAP.put('\uE09A', "\u0308"); 138 CharConverter.EARLY_MAP_MAP.put('\uE09B', "\u20DB"); 139 CharConverter.EARLY_MAP_MAP.put('\uE09C', "\u030A"); 140 CharConverter.EARLY_MAP_MAP.put('\uE09E', "\u0028"); 141 CharConverter.EARLY_MAP_MAP.put('\uE09F', "\u0029"); 142 CharConverter.EARLY_MAP_MAP.put('\uE0A2', "\u301A"); 143 CharConverter.EARLY_MAP_MAP.put('\uE0A3', "\u301B"); 144 CharConverter.EARLY_MAP_MAP.put('\uE0A4', "\u2373"); 145 CharConverter.EARLY_MAP_MAP.put('\uE0A8', "\u002F"); 146 CharConverter.EARLY_MAP_MAP.put('\uE0A9', "\\"); 147 CharConverter.EARLY_MAP_MAP.put('\uE0AA', "\u274F"); 148 CharConverter.EARLY_MAP_MAP.put('\uE0AC', "\u0393"); 149 CharConverter.EARLY_MAP_MAP.put('\uE0AD', "\u0394"); 150 CharConverter.EARLY_MAP_MAP.put('\uE0AE', "\u0398"); 151 CharConverter.EARLY_MAP_MAP.put('\uE0AF', "\u039b"); 152 CharConverter.EARLY_MAP_MAP.put('\uE0B0', "\u039e"); 153 CharConverter.EARLY_MAP_MAP.put('\uE0B1', "\u03A0"); 154 CharConverter.EARLY_MAP_MAP.put('\uE0B2', "\u03a3"); 155 CharConverter.EARLY_MAP_MAP.put('\uE0B3', "\u03a5"); 156 CharConverter.EARLY_MAP_MAP.put('\uE0B4', "\u03a6"); 157 CharConverter.EARLY_MAP_MAP.put('\uE0B5', "\u03a8"); 158 CharConverter.EARLY_MAP_MAP.put('\uE0B6', "\u03a9"); 159 CharConverter.EARLY_MAP_MAP.put('\uE0B7', "\u03b1"); 160 CharConverter.EARLY_MAP_MAP.put('\uE0B8', "\u03b2"); 161 CharConverter.EARLY_MAP_MAP.put('\uE0B9', "\u03b3"); 162 CharConverter.EARLY_MAP_MAP.put('\uE0BA', "\u03b4"); 163 CharConverter.EARLY_MAP_MAP.put('\uE0BB', "\u03b5"); 164 CharConverter.EARLY_MAP_MAP.put('\uE0BC', "\u03b6"); 165 CharConverter.EARLY_MAP_MAP.put('\uE0BD', "\u03b7"); 166 CharConverter.EARLY_MAP_MAP.put('\uE0BE', "\u03b8"); 167 CharConverter.EARLY_MAP_MAP.put('\uE0BF', "\u03b9"); 168 CharConverter.EARLY_MAP_MAP.put('\uE0C0', "\u03ba"); 169 CharConverter.EARLY_MAP_MAP.put('\uE0C1', "\u03bb"); 170 CharConverter.EARLY_MAP_MAP.put('\uE0C2', "\u03bc"); 171 CharConverter.EARLY_MAP_MAP.put('\uE0C3', "\u03bd"); 172 CharConverter.EARLY_MAP_MAP.put('\uE0C4', "\u03be"); 173 CharConverter.EARLY_MAP_MAP.put('\uE0C5', "\u03bf"); 174 CharConverter.EARLY_MAP_MAP.put('\uE0C6', "\u03c0"); 175 CharConverter.EARLY_MAP_MAP.put('\uE0C7', "\u03c1"); 176 CharConverter.EARLY_MAP_MAP.put('\uE0C8', "\u03c3"); 177 CharConverter.EARLY_MAP_MAP.put('\uE0C9', "\u03c4"); 178 CharConverter.EARLY_MAP_MAP.put('\uE0CA', "\u03c5"); 179 CharConverter.EARLY_MAP_MAP.put('\uE0CB', "\u03c6"); 180 CharConverter.EARLY_MAP_MAP.put('\uE0CC', "\u03c7"); 181 CharConverter.EARLY_MAP_MAP.put('\uE0CD', "\u03c8"); 182 CharConverter.EARLY_MAP_MAP.put('\uE0CE', "\u03c9"); 183 CharConverter.EARLY_MAP_MAP.put('\uE0CF', "\u03b5"); 184 CharConverter.EARLY_MAP_MAP.put('\uE0D0', "\u03d1"); 185 CharConverter.EARLY_MAP_MAP.put('\uE0D1', "\u03d6"); 186 CharConverter.EARLY_MAP_MAP.put('\uE0D2', "\u03f1"); 187 CharConverter.EARLY_MAP_MAP.put('\uE0D3', "\u03db"); 188 CharConverter.EARLY_MAP_MAP.put('\uE0D4', "\u2118"); 189 CharConverter.EARLY_MAP_MAP.put('\uE0D5', "\u2202"); 190 CharConverter.EARLY_MAP_MAP.put('\uE0D6', "\u2129"); 191 CharConverter.EARLY_MAP_MAP.put('\uE0D7', "\u2107"); 192 CharConverter.EARLY_MAP_MAP.put('\uE0D8', "\u2127"); 193 CharConverter.EARLY_MAP_MAP.put('\uE0D9', "\u22A4"); 194 CharConverter.EARLY_MAP_MAP.put('\uE0DA', "\u019B"); 195 CharConverter.EARLY_MAP_MAP.put('\uE0DB', "\u2190"); 196 CharConverter.EARLY_MAP_MAP.put('\uE0DC', "\u2191"); 197 CharConverter.EARLY_MAP_MAP.put('\uE0DD', "\u2193"); 198 199 // This set is generated by LaTeXMathML, and seems to be the set 200 // supported via Mathematica 4.1 fonts in Firefox 2.0 201 202 // SCRIPT CAPITALS 203 CharConverter.EARLY_MAP_MAP.put('\uEF35', new String( 204 new int[] { 0x1D49C }, 0, 1)); 205 CharConverter.EARLY_MAP_MAP.put('\uEF36', new String( 206 new int[] { 0x1D49E }, 0, 1)); 207 CharConverter.EARLY_MAP_MAP.put('\uEF37', new String( 208 new int[] { 0x1D49F }, 0, 1)); 209 CharConverter.EARLY_MAP_MAP.put('\uEF38', new String( 210 new int[] { 0x1D4A2 }, 0, 1)); 211 CharConverter.EARLY_MAP_MAP.put('\uEF39', new String( 212 new int[] { 0x1D4A5 }, 0, 1)); 213 CharConverter.EARLY_MAP_MAP.put('\uEF3A', new String( 214 new int[] { 0x1D4A6 }, 0, 1)); 215 CharConverter.EARLY_MAP_MAP.put('\uEF3B', new String( 216 new int[] { 0x1D4A9 }, 0, 1)); 217 CharConverter.EARLY_MAP_MAP.put('\uEF3C', new String( 218 new int[] { 0x1D4AA }, 0, 1)); 219 CharConverter.EARLY_MAP_MAP.put('\uEF3D', new String( 220 new int[] { 0x1D4AB }, 0, 1)); 221 CharConverter.EARLY_MAP_MAP.put('\uEF3E', new String( 222 new int[] { 0x1D4AC }, 0, 1)); 223 CharConverter.EARLY_MAP_MAP.put('\uEF3F', new String( 224 new int[] { 0x1D4AE }, 0, 1)); 225 CharConverter.EARLY_MAP_MAP.put('\uEF40', new String( 226 new int[] { 0x1D4AF }, 0, 1)); 227 CharConverter.EARLY_MAP_MAP.put('\uEF41', new String( 228 new int[] { 0x1D4B0 }, 0, 1)); 229 CharConverter.EARLY_MAP_MAP.put('\uEF42', new String( 230 new int[] { 0x1D4B1 }, 0, 1)); 231 CharConverter.EARLY_MAP_MAP.put('\uEF43', new String( 232 new int[] { 0x1D4B2 }, 0, 1)); 233 CharConverter.EARLY_MAP_MAP.put('\uEF44', new String( 234 new int[] { 0x1D4B3 }, 0, 1)); 235 CharConverter.EARLY_MAP_MAP.put('\uEF45', new String( 236 new int[] { 0x1D4B4 }, 0, 1)); 237 CharConverter.EARLY_MAP_MAP.put('\uEF46', new String( 238 new int[] { 0x1D4B5 }, 0, 1)); 239 240 // FRAKTUR 241 CharConverter.EARLY_MAP_MAP.put('\uEF5D', new String( 242 new int[] { 0x1D504 }, 0, 1)); 243 CharConverter.EARLY_MAP_MAP.put('\uEF5E', new String( 244 new int[] { 0x1D505 }, 0, 1)); 245 CharConverter.EARLY_MAP_MAP.put('\uEF5F', new String( 246 new int[] { 0x1D507 }, 0, 1)); 247 CharConverter.EARLY_MAP_MAP.put('\uEF60', new String( 248 new int[] { 0x1D508 }, 0, 1)); 249 CharConverter.EARLY_MAP_MAP.put('\uEF61', new String( 250 new int[] { 0x1D509 }, 0, 1)); 251 CharConverter.EARLY_MAP_MAP.put('\uEF62', new String( 252 new int[] { 0x1D50A }, 0, 1)); 253 CharConverter.EARLY_MAP_MAP.put('\uEF63', new String( 254 new int[] { 0x1D50D }, 0, 1)); 255 CharConverter.EARLY_MAP_MAP.put('\uEF64', new String( 256 new int[] { 0x1D50E }, 0, 1)); 257 CharConverter.EARLY_MAP_MAP.put('\uEF65', new String( 258 new int[] { 0x1D50F }, 0, 1)); 259 CharConverter.EARLY_MAP_MAP.put('\uEF66', new String( 260 new int[] { 0x1D510 }, 0, 1)); 261 CharConverter.EARLY_MAP_MAP.put('\uEF67', new String( 262 new int[] { 0x1D511 }, 0, 1)); 263 CharConverter.EARLY_MAP_MAP.put('\uEF68', new String( 264 new int[] { 0x1D512 }, 0, 1)); 265 CharConverter.EARLY_MAP_MAP.put('\uEF69', new String( 266 new int[] { 0x1D513 }, 0, 1)); 267 CharConverter.EARLY_MAP_MAP.put('\uEF6A', new String( 268 new int[] { 0x1D514 }, 0, 1)); 269 CharConverter.EARLY_MAP_MAP.put('\uEF6B', new String( 270 new int[] { 0x1D516 }, 0, 1)); 271 CharConverter.EARLY_MAP_MAP.put('\uEF6C', new String( 272 new int[] { 0x1D517 }, 0, 1)); 273 CharConverter.EARLY_MAP_MAP.put('\uEF6D', new String( 274 new int[] { 0x1D518 }, 0, 1)); 275 CharConverter.EARLY_MAP_MAP.put('\uEF6E', new String( 276 new int[] { 0x1D519 }, 0, 1)); 277 CharConverter.EARLY_MAP_MAP.put('\uEF6F', new String( 278 new int[] { 0x1D51A }, 0, 1)); 279 CharConverter.EARLY_MAP_MAP.put('\uEF70', new String( 280 new int[] { 0x1D51B }, 0, 1)); 281 CharConverter.EARLY_MAP_MAP.put('\uEF71', new String( 282 new int[] { 0x1D51C }, 0, 1)); 283 284 // DOUBLE_STRUCK CAPITALS 285 CharConverter.EARLY_MAP_MAP.put('\uEF8C', new String( 286 new int[] { 0x1D538 }, 0, 1)); 287 CharConverter.EARLY_MAP_MAP.put('\uEF8D', new String( 288 new int[] { 0x1D539 }, 0, 1)); 289 CharConverter.EARLY_MAP_MAP.put('\uEF8E', new String( 290 new int[] { 0x1D53B }, 0, 1)); 291 CharConverter.EARLY_MAP_MAP.put('\uEF8F', new String( 292 new int[] { 0x1D53C }, 0, 1)); 293 CharConverter.EARLY_MAP_MAP.put('\uEF90', new String( 294 new int[] { 0x1D53D }, 0, 1)); 295 CharConverter.EARLY_MAP_MAP.put('\uEF91', new String( 296 new int[] { 0x1D53E }, 0, 1)); 297 CharConverter.EARLY_MAP_MAP.put('\uEF92', new String( 298 new int[] { 0x1D540 }, 0, 1)); 299 CharConverter.EARLY_MAP_MAP.put('\uEF93', new String( 300 new int[] { 0x1D541 }, 0, 1)); 301 CharConverter.EARLY_MAP_MAP.put('\uEF94', new String( 302 new int[] { 0x1D542 }, 0, 1)); 303 CharConverter.EARLY_MAP_MAP.put('\uEF95', new String( 304 new int[] { 0x1D543 }, 0, 1)); 305 CharConverter.EARLY_MAP_MAP.put('\uEF96', new String( 306 new int[] { 0x1D544 }, 0, 1)); 307 CharConverter.EARLY_MAP_MAP.put('\uEF97', new String( 308 new int[] { 0x1D546 }, 0, 1)); 309 CharConverter.EARLY_MAP_MAP.put('\uEF98', new String( 310 new int[] { 0x1D54A }, 0, 1)); 311 CharConverter.EARLY_MAP_MAP.put('\uEF99', new String( 312 new int[] { 0x1D54B }, 0, 1)); 313 CharConverter.EARLY_MAP_MAP.put('\uEF9A', new String( 314 new int[] { 0x1D54C }, 0, 1)); 315 CharConverter.EARLY_MAP_MAP.put('\uEF9B', new String( 316 new int[] { 0x1D54D }, 0, 1)); 317 CharConverter.EARLY_MAP_MAP.put('\uEF9C', new String( 318 new int[] { 0x1D54E }, 0, 1)); 319 CharConverter.EARLY_MAP_MAP.put('\uEF9D', new String( 320 new int[] { 0x1D54F }, 0, 1)); 321 CharConverter.EARLY_MAP_MAP.put('\uEF9E', new String( 322 new int[] { 0x1D550 }, 0, 1)); 323 // CHECKSTYLE:ON 324 } 325 }