001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements. See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership. The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018 /*
019 * $Id: ToHTMLStream.java 1225444 2011-12-29 05:52:39Z mrglavas $
020 */
021 package org.apache.xml.serializer;
022
023 import java.io.IOException;
024 import java.util.Properties;
025
026 import javax.xml.transform.Result;
027
028 import org.apache.xml.serializer.utils.MsgKey;
029 import org.apache.xml.serializer.utils.Utils;
030 import org.xml.sax.Attributes;
031 import org.xml.sax.SAXException;
032
033 /**
034 * This serializer takes a series of SAX or
035 * SAX-like events and writes its output
036 * to the given stream.
037 *
038 * This class is not a public API, it is public
039 * because it is used from another package.
040 *
041 * @xsl.usage internal
042 */
043 public class ToHTMLStream extends ToStream
044 {
045
046 /** This flag is set while receiving events from the DTD */
047 protected boolean m_inDTD = false;
048
049 /** True if the current element is a block element. (seems like
050 * this needs to be a stack. -sb). */
051 private boolean m_inBlockElem = false;
052
053 /**
054 * Map that tells which XML characters should have special treatment, and it
055 * provides character to entity name lookup.
056 */
057 private final CharInfo m_htmlcharInfo =
058 // new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
059 CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
060
061 /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
062 static final Trie m_elementFlags = new Trie();
063
064 static {
065 initTagReference(m_elementFlags);
066 }
067 static void initTagReference(Trie m_elementFlags) {
068
069 // HTML 4.0 loose DTD
070 m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
071 m_elementFlags.put(
072 "FRAME",
073 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
074 m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
075 m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
076 m_elementFlags.put(
077 "ISINDEX",
078 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
079 m_elementFlags.put(
080 "APPLET",
081 new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
082 m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
083 m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
084 m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
085
086 // HTML 4.0 strict DTD
087 m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
088 m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
089 m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
090 m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
091 m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
092 m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
093 m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
094 m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
095 m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
096 m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
097 m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
098 m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
099 m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
100 m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
101 m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
102 m_elementFlags.put(
103 "SUP",
104 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
105 m_elementFlags.put(
106 "SUB",
107 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
108 m_elementFlags.put(
109 "SPAN",
110 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
111 m_elementFlags.put(
112 "BDO",
113 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
114 m_elementFlags.put(
115 "BR",
116 new ElemDesc(
117 0
118 | ElemDesc.SPECIAL
119 | ElemDesc.ASPECIAL
120 | ElemDesc.EMPTY
121 | ElemDesc.BLOCK));
122 m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
123 m_elementFlags.put(
124 "ADDRESS",
125 new ElemDesc(
126 0
127 | ElemDesc.BLOCK
128 | ElemDesc.BLOCKFORM
129 | ElemDesc.BLOCKFORMFIELDSET));
130 m_elementFlags.put(
131 "DIV",
132 new ElemDesc(
133 0
134 | ElemDesc.BLOCK
135 | ElemDesc.BLOCKFORM
136 | ElemDesc.BLOCKFORMFIELDSET));
137 m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
138 m_elementFlags.put(
139 "MAP",
140 new ElemDesc(
141 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
142 m_elementFlags.put(
143 "AREA",
144 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
145 m_elementFlags.put(
146 "LINK",
147 new ElemDesc(
148 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
149 m_elementFlags.put(
150 "IMG",
151 new ElemDesc(
152 0
153 | ElemDesc.SPECIAL
154 | ElemDesc.ASPECIAL
155 | ElemDesc.EMPTY
156 | ElemDesc.WHITESPACESENSITIVE));
157 m_elementFlags.put(
158 "OBJECT",
159 new ElemDesc(
160 0
161 | ElemDesc.SPECIAL
162 | ElemDesc.ASPECIAL
163 | ElemDesc.HEADMISC
164 | ElemDesc.WHITESPACESENSITIVE));
165 m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
166 m_elementFlags.put(
167 "HR",
168 new ElemDesc(
169 0
170 | ElemDesc.BLOCK
171 | ElemDesc.BLOCKFORM
172 | ElemDesc.BLOCKFORMFIELDSET
173 | ElemDesc.EMPTY));
174 m_elementFlags.put(
175 "P",
176 new ElemDesc(
177 0
178 | ElemDesc.BLOCK
179 | ElemDesc.BLOCKFORM
180 | ElemDesc.BLOCKFORMFIELDSET));
181 m_elementFlags.put(
182 "H1",
183 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
184 m_elementFlags.put(
185 "H2",
186 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
187 m_elementFlags.put(
188 "H3",
189 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
190 m_elementFlags.put(
191 "H4",
192 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
193 m_elementFlags.put(
194 "H5",
195 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
196 m_elementFlags.put(
197 "H6",
198 new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
199 m_elementFlags.put(
200 "PRE",
201 new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
202 m_elementFlags.put(
203 "Q",
204 new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
205 m_elementFlags.put(
206 "BLOCKQUOTE",
207 new ElemDesc(
208 0
209 | ElemDesc.BLOCK
210 | ElemDesc.BLOCKFORM
211 | ElemDesc.BLOCKFORMFIELDSET));
212 m_elementFlags.put("INS", new ElemDesc(0));
213 m_elementFlags.put("DEL", new ElemDesc(0));
214 m_elementFlags.put(
215 "DL",
216 new ElemDesc(
217 0
218 | ElemDesc.BLOCK
219 | ElemDesc.BLOCKFORM
220 | ElemDesc.BLOCKFORMFIELDSET));
221 m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
222 m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
223 m_elementFlags.put(
224 "OL",
225 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
226 m_elementFlags.put(
227 "UL",
228 new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
229 m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
230 m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
231 m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
232 m_elementFlags.put(
233 "INPUT",
234 new ElemDesc(
235 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
236 m_elementFlags.put(
237 "SELECT",
238 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
239 m_elementFlags.put("OPTGROUP", new ElemDesc(0));
240 m_elementFlags.put("OPTION", new ElemDesc(0));
241 m_elementFlags.put(
242 "TEXTAREA",
243 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
244 m_elementFlags.put(
245 "FIELDSET",
246 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
247 m_elementFlags.put("LEGEND", new ElemDesc(0));
248 m_elementFlags.put(
249 "BUTTON",
250 new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
251 m_elementFlags.put(
252 "TABLE",
253 new ElemDesc(
254 0
255 | ElemDesc.BLOCK
256 | ElemDesc.BLOCKFORM
257 | ElemDesc.BLOCKFORMFIELDSET));
258 m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
259 m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
260 m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
261 m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
262 m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
263 m_elementFlags.put(
264 "COL",
265 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
266 m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
267 m_elementFlags.put("TH", new ElemDesc(0));
268 m_elementFlags.put("TD", new ElemDesc(0));
269 m_elementFlags.put(
270 "HEAD",
271 new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
272 m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
273 m_elementFlags.put(
274 "BASE",
275 new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
276 m_elementFlags.put(
277 "META",
278 new ElemDesc(
279 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
280 m_elementFlags.put(
281 "STYLE",
282 new ElemDesc(
283 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
284 m_elementFlags.put(
285 "SCRIPT",
286 new ElemDesc(
287 0
288 | ElemDesc.SPECIAL
289 | ElemDesc.ASPECIAL
290 | ElemDesc.HEADMISC
291 | ElemDesc.RAW));
292 m_elementFlags.put(
293 "NOSCRIPT",
294 new ElemDesc(
295 0
296 | ElemDesc.BLOCK
297 | ElemDesc.BLOCKFORM
298 | ElemDesc.BLOCKFORMFIELDSET));
299 m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HTMLELEM));
300
301 // From "John Ky" <hand@syd.speednet.com.au
302 // Transitional Document Type Definition ()
303 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
304 m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
305
306 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
307 m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
308 m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
309
310 // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
311 m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
312
313 // From "John Ky" <hand@syd.speednet.com.au
314 m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
315
316 // HTML 4.0, section 16.5
317 m_elementFlags.put(
318 "IFRAME",
319 new ElemDesc(
320 0
321 | ElemDesc.BLOCK
322 | ElemDesc.BLOCKFORM
323 | ElemDesc.BLOCKFORMFIELDSET));
324
325 // Netscape 4 extension
326 m_elementFlags.put(
327 "LAYER",
328 new ElemDesc(
329 0
330 | ElemDesc.BLOCK
331 | ElemDesc.BLOCKFORM
332 | ElemDesc.BLOCKFORMFIELDSET));
333 // Netscape 4 extension
334 m_elementFlags.put(
335 "ILAYER",
336 new ElemDesc(
337 0
338 | ElemDesc.BLOCK
339 | ElemDesc.BLOCKFORM
340 | ElemDesc.BLOCKFORMFIELDSET));
341
342 // NOW FOR ATTRIBUTE INFORMATION . . .
343 ElemDesc elemDesc;
344
345
346 // ----------------------------------------------
347 elemDesc = (ElemDesc) m_elementFlags.get("a");
348 elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
349 elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
350
351 // ----------------------------------------------
352 elemDesc = (ElemDesc) m_elementFlags.get("area");
353
354 elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
355 elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
356
357 // ----------------------------------------------
358 elemDesc = (ElemDesc) m_elementFlags.get("base");
359
360 elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
361
362 // ----------------------------------------------
363 elemDesc = (ElemDesc) m_elementFlags.get("button");
364 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
365
366 // ----------------------------------------------
367 elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
368
369 elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
370
371 // ----------------------------------------------
372 elemDesc = (ElemDesc) m_elementFlags.get("del");
373 elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
374
375 // ----------------------------------------------
376 elemDesc = (ElemDesc) m_elementFlags.get("dir");
377 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
378
379 // ----------------------------------------------
380
381 elemDesc = (ElemDesc) m_elementFlags.get("div");
382 elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
383 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
384
385 // ----------------------------------------------
386 elemDesc = (ElemDesc) m_elementFlags.get("dl");
387 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
388
389 // ----------------------------------------------
390 elemDesc = (ElemDesc) m_elementFlags.get("form");
391 elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
392
393 // ----------------------------------------------
394 // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
395 elemDesc = (ElemDesc) m_elementFlags.get("frame");
396 elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
397 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
398 elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
399
400 // ----------------------------------------------
401 elemDesc = (ElemDesc) m_elementFlags.get("head");
402 elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
403
404 // ----------------------------------------------
405 elemDesc = (ElemDesc) m_elementFlags.get("hr");
406 elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
407
408 // ----------------------------------------------
409 // HTML 4.0, section 16.5
410 elemDesc = (ElemDesc) m_elementFlags.get("iframe");
411 elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
412 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
413
414 // ----------------------------------------------
415 // Netscape 4 extension
416 elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
417 elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
418
419 // ----------------------------------------------
420 elemDesc = (ElemDesc) m_elementFlags.get("img");
421 elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
422 elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
423 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
424 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
425
426 // ----------------------------------------------
427 elemDesc = (ElemDesc) m_elementFlags.get("input");
428
429 elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
430 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
431 elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
432 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
433 elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
434 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
435
436 // ----------------------------------------------
437 elemDesc = (ElemDesc) m_elementFlags.get("ins");
438 elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
439
440 // ----------------------------------------------
441 // Netscape 4 extension
442 elemDesc = (ElemDesc) m_elementFlags.get("layer");
443 elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
444
445 // ----------------------------------------------
446 elemDesc = (ElemDesc) m_elementFlags.get("link");
447 elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
448
449 // ----------------------------------------------
450 elemDesc = (ElemDesc) m_elementFlags.get("menu");
451 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
452
453 // ----------------------------------------------
454 elemDesc = (ElemDesc) m_elementFlags.get("object");
455
456 elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
457 elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
458 elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
459 elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
460 elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
461 elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
462
463 // ----------------------------------------------
464 elemDesc = (ElemDesc) m_elementFlags.get("ol");
465 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
466
467 // ----------------------------------------------
468 elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
469 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
470
471 // ----------------------------------------------
472 elemDesc = (ElemDesc) m_elementFlags.get("option");
473 elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
474 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
475
476 // ----------------------------------------------
477 elemDesc = (ElemDesc) m_elementFlags.get("q");
478 elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
479
480 // ----------------------------------------------
481 elemDesc = (ElemDesc) m_elementFlags.get("script");
482 elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
483 elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
484 elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
485
486 // ----------------------------------------------
487 elemDesc = (ElemDesc) m_elementFlags.get("select");
488 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
489 elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
490
491 // ----------------------------------------------
492 elemDesc = (ElemDesc) m_elementFlags.get("table");
493 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
494
495 // ----------------------------------------------
496 elemDesc = (ElemDesc) m_elementFlags.get("td");
497 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
498
499 // ----------------------------------------------
500 elemDesc = (ElemDesc) m_elementFlags.get("textarea");
501 elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
502 elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
503
504 // ----------------------------------------------
505 elemDesc = (ElemDesc) m_elementFlags.get("th");
506 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
507
508 // ----------------------------------------------
509 // The nowrap attribute of a tr element is both
510 // a Netscape and Internet-Explorer extension
511 elemDesc = (ElemDesc) m_elementFlags.get("tr");
512 elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
513
514 // ----------------------------------------------
515 elemDesc = (ElemDesc) m_elementFlags.get("ul");
516 elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
517 }
518
519 /**
520 * Dummy element for elements not found.
521 */
522 static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
523
524 /** True if URLs should be specially escaped with the %xx form. */
525 private boolean m_specialEscapeURLs = true;
526
527 /** True if the META tag should be omitted. */
528 private boolean m_omitMetaTag = false;
529
530 /**
531 * Tells if the formatter should use special URL escaping.
532 *
533 * @param bool True if URLs should be specially escaped with the %xx form.
534 */
535 public void setSpecialEscapeURLs(boolean bool)
536 {
537 m_specialEscapeURLs = bool;
538 }
539
540 /**
541 * Tells if the formatter should omit the META tag.
542 *
543 * @param bool True if the META tag should be omitted.
544 */
545 public void setOmitMetaTag(boolean bool)
546 {
547 m_omitMetaTag = bool;
548 }
549
550 /**
551 * Specifies an output format for this serializer. It the
552 * serializer has already been associated with an output format,
553 * it will switch to the new format. This method should not be
554 * called while the serializer is in the process of serializing
555 * a document.
556 *
557 * This method can be called multiple times before starting
558 * the serialization of a particular result-tree. In principle
559 * all serialization parameters can be changed, with the exception
560 * of method="html" (it must be method="html" otherwise we
561 * shouldn't even have a ToHTMLStream object here!)
562 *
563 * @param format The output format or serialzation parameters
564 * to use.
565 */
566 public void setOutputFormat(Properties format)
567 {
568 /*
569 * If "format" does not contain the property
570 * S_USE_URL_ESCAPING, then don't set this value at all,
571 * just leave as-is rather than explicitly setting it.
572 */
573 String value;
574 value = format.getProperty(OutputPropertiesFactory.S_USE_URL_ESCAPING);
575 if (value != null) {
576 m_specialEscapeURLs =
577 OutputPropertyUtils.getBooleanProperty(
578 OutputPropertiesFactory.S_USE_URL_ESCAPING,
579 format);
580 }
581
582 /*
583 * If "format" does not contain the property
584 * S_OMIT_META_TAG, then don't set this value at all,
585 * just leave as-is rather than explicitly setting it.
586 */
587 value = format.getProperty(OutputPropertiesFactory.S_OMIT_META_TAG);
588 if (value != null) {
589 m_omitMetaTag =
590 OutputPropertyUtils.getBooleanProperty(
591 OutputPropertiesFactory.S_OMIT_META_TAG,
592 format);
593 }
594
595 super.setOutputFormat(format);
596 }
597
598 /**
599 * Tells if the formatter should use special URL escaping.
600 *
601 * @return True if URLs should be specially escaped with the %xx form.
602 */
603 private final boolean getSpecialEscapeURLs()
604 {
605 return m_specialEscapeURLs;
606 }
607
608 /**
609 * Tells if the formatter should omit the META tag.
610 *
611 * @return True if the META tag should be omitted.
612 */
613 private final boolean getOmitMetaTag()
614 {
615 return m_omitMetaTag;
616 }
617
618 /**
619 * Get a description of the given element.
620 *
621 * @param name non-null name of element, case insensitive.
622 *
623 * @return non-null reference to ElemDesc, which may be m_dummy if no
624 * element description matches the given name.
625 */
626 public static final ElemDesc getElemDesc(String name)
627 {
628 /* this method used to return m_dummy when name was null
629 * but now it doesn't check and and requires non-null name.
630 */
631 Object obj = m_elementFlags.get(name);
632 if (null != obj)
633 return (ElemDesc)obj;
634 return m_dummy;
635 }
636
637
638 /**
639 * A Trie that is just a copy of the "static" one.
640 * We need this one to be able to use the faster, but not thread-safe
641 * method Trie.get2(name)
642 */
643 private Trie m_htmlInfo = new Trie(m_elementFlags);
644 /**
645 * Calls to this method could be replaced with calls to
646 * getElemDesc(name), but this one should be faster.
647 */
648 private ElemDesc getElemDesc2(String name)
649 {
650 Object obj = m_htmlInfo.get2(name);
651 if (null != obj)
652 return (ElemDesc)obj;
653 return m_dummy;
654 }
655
656 /**
657 * Default constructor.
658 */
659 public ToHTMLStream()
660 {
661
662 super();
663 // we are just constructing this thing, no output properties
664 // have been used, so we will set the right default for
665 // indenting anyways
666 m_doIndent = true;
667 m_charInfo = m_htmlcharInfo;
668 // initialize namespaces
669 m_prefixMap = new NamespaceMappings();
670
671 }
672
673 /** The name of the current element. */
674 // private String m_currentElementName = null;
675
676 /**
677 * Receive notification of the beginning of a document.
678 *
679 * @throws org.xml.sax.SAXException Any SAX exception, possibly
680 * wrapping another exception.
681 *
682 * @throws org.xml.sax.SAXException
683 */
684 protected void startDocumentInternal() throws org.xml.sax.SAXException
685 {
686 super.startDocumentInternal();
687
688 m_needToCallStartDocument = false;
689 m_needToOutputDocTypeDecl = true;
690 m_startNewLine = false;
691 setOmitXMLDeclaration(true);
692 }
693
694 /**
695 * This method should only get called once.
696 * If a DOCTYPE declaration needs to get written out, it will
697 * be written out. If it doesn't need to be written out, then
698 * the call to this method has no effect.
699 */
700 private void outputDocTypeDecl(String name) throws SAXException {
701 if (true == m_needToOutputDocTypeDecl)
702 {
703 String doctypeSystem = getDoctypeSystem();
704 String doctypePublic = getDoctypePublic();
705 if ((null != doctypeSystem) || (null != doctypePublic))
706 {
707 final java.io.Writer writer = m_writer;
708 try
709 {
710 writer.write("<!DOCTYPE ");
711 writer.write(name);
712
713 if (null != doctypePublic)
714 {
715 writer.write(" PUBLIC \"");
716 writer.write(doctypePublic);
717 writer.write('"');
718 }
719
720 if (null != doctypeSystem)
721 {
722 if (null == doctypePublic)
723 writer.write(" SYSTEM \"");
724 else
725 writer.write(" \"");
726
727 writer.write(doctypeSystem);
728 writer.write('"');
729 }
730
731 writer.write('>');
732 outputLineSep();
733 }
734 catch(IOException e)
735 {
736 throw new SAXException(e);
737 }
738 }
739 }
740
741 m_needToOutputDocTypeDecl = false;
742 }
743
744 /**
745 * Receive notification of the end of a document.
746 *
747 * @throws org.xml.sax.SAXException Any SAX exception, possibly
748 * wrapping another exception.
749 *
750 * @throws org.xml.sax.SAXException
751 */
752 public final void endDocument() throws org.xml.sax.SAXException
753 {
754
755 flushPending();
756 if (m_doIndent && !m_isprevtext)
757 {
758 try
759 {
760 outputLineSep();
761 }
762 catch(IOException e)
763 {
764 throw new SAXException(e);
765 }
766 }
767
768 flushWriter();
769 if (m_tracer != null)
770 super.fireEndDoc();
771 }
772
773 /**
774 * Receive notification of the beginning of an element.
775 *
776 *
777 * @param namespaceURI
778 * @param localName
779 * @param name The element type name.
780 * @param atts The attributes attached to the element, if any.
781 * @throws org.xml.sax.SAXException Any SAX exception, possibly
782 * wrapping another exception.
783 * @see #endElement
784 * @see org.xml.sax.AttributeList
785 */
786 public void startElement(
787 String namespaceURI,
788 String localName,
789 String name,
790 Attributes atts)
791 throws org.xml.sax.SAXException
792 {
793
794 ElemContext elemContext = m_elemContext;
795
796 // clean up any pending things first
797 if (elemContext.m_startTagOpen)
798 {
799 closeStartTag();
800 elemContext.m_startTagOpen = false;
801 }
802 else if (m_cdataTagOpen)
803 {
804 closeCDATA();
805 m_cdataTagOpen = false;
806 }
807 else if (m_needToCallStartDocument)
808 {
809 startDocumentInternal();
810 m_needToCallStartDocument = false;
811 }
812
813 if (m_needToOutputDocTypeDecl) {
814 String n = name;
815 if (n == null || n.length() == 0) {
816 // If the lexical QName is not given
817 // use the localName in the DOCTYPE
818 n = localName;
819 }
820 outputDocTypeDecl(n);
821 }
822
823
824 // if this element has a namespace then treat it like XML
825 if (null != namespaceURI && namespaceURI.length() > 0)
826 {
827 super.startElement(namespaceURI, localName, name, atts);
828
829 return;
830 }
831
832 try
833 {
834 // getElemDesc2(name) is faster than getElemDesc(name)
835 ElemDesc elemDesc = getElemDesc2(name);
836 int elemFlags = elemDesc.getFlags();
837
838 // deal with indentation issues first
839 if (m_doIndent)
840 {
841
842 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
843 if (m_ispreserve)
844 m_ispreserve = false;
845 else if (
846 (null != elemContext.m_elementName)
847 && (!m_inBlockElem
848 || isBlockElement) /* && !isWhiteSpaceSensitive */
849 )
850 {
851 m_startNewLine = true;
852
853 indent();
854
855 }
856 m_inBlockElem = !isBlockElement;
857 }
858
859 // save any attributes for later processing
860 if (atts != null)
861 addAttributes(atts);
862
863 m_isprevtext = false;
864 final java.io.Writer writer = m_writer;
865 writer.write('<');
866 writer.write(name);
867
868
869
870 if (m_tracer != null)
871 firePseudoAttributes();
872
873 if ((elemFlags & ElemDesc.EMPTY) != 0)
874 {
875 // an optimization for elements which are expected
876 // to be empty.
877 m_elemContext = elemContext.push();
878 /* XSLTC sometimes calls namespaceAfterStartElement()
879 * so we need to remember the name
880 */
881 m_elemContext.m_elementName = name;
882 m_elemContext.m_elementDesc = elemDesc;
883 return;
884 }
885 else
886 {
887 elemContext = elemContext.push(namespaceURI,localName,name);
888 m_elemContext = elemContext;
889 elemContext.m_elementDesc = elemDesc;
890 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
891 }
892
893
894 if ((elemFlags & ElemDesc.HEADELEM) != 0)
895 {
896 // This is the <HEAD> element, do some special processing
897 closeStartTag();
898 elemContext.m_startTagOpen = false;
899 if (!m_omitMetaTag)
900 {
901 if (m_doIndent)
902 indent();
903 writer.write(
904 "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
905 String encoding = getEncoding();
906 String encode = Encodings.getMimeEncoding(encoding);
907 writer.write(encode);
908 writer.write("\">");
909 }
910 }
911 }
912 catch (IOException e)
913 {
914 throw new SAXException(e);
915 }
916 }
917
918 /**
919 * Receive notification of the end of an element.
920 *
921 *
922 * @param namespaceURI
923 * @param localName
924 * @param name The element type name
925 * @throws org.xml.sax.SAXException Any SAX exception, possibly
926 * wrapping another exception.
927 */
928 public final void endElement(
929 final String namespaceURI,
930 final String localName,
931 final String name)
932 throws org.xml.sax.SAXException
933 {
934 // deal with any pending issues
935 if (m_cdataTagOpen)
936 closeCDATA();
937
938 // if the element has a namespace, treat it like XML, not HTML
939 if (null != namespaceURI && namespaceURI.length() > 0)
940 {
941 super.endElement(namespaceURI, localName, name);
942
943 return;
944 }
945
946 try
947 {
948
949 ElemContext elemContext = m_elemContext;
950 final ElemDesc elemDesc = elemContext.m_elementDesc;
951 final int elemFlags = elemDesc.getFlags();
952 final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
953
954 // deal with any indentation issues
955 if (m_doIndent)
956 {
957 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
958 boolean shouldIndent = false;
959
960 if (m_ispreserve)
961 {
962 m_ispreserve = false;
963 }
964 else if (m_doIndent && (!m_inBlockElem || isBlockElement))
965 {
966 m_startNewLine = true;
967 shouldIndent = true;
968 }
969 if (!elemContext.m_startTagOpen && shouldIndent)
970 indent(elemContext.m_currentElemDepth - 1);
971 m_inBlockElem = !isBlockElement;
972 }
973
974 final java.io.Writer writer = m_writer;
975 if (!elemContext.m_startTagOpen)
976 {
977 writer.write("</");
978 writer.write(name);
979 writer.write('>');
980 }
981 else
982 {
983 // the start-tag open when this method was called,
984 // so we need to process it now.
985
986 if (m_tracer != null)
987 super.fireStartElem(name);
988
989 // the starting tag was still open when we received this endElement() call
990 // so we need to process any gathered attributes NOW, before they go away.
991 int nAttrs = m_attributes.getLength();
992 if (nAttrs > 0)
993 {
994 processAttributes(m_writer, nAttrs);
995 // clear attributes object for re-use with next element
996 m_attributes.clear();
997 }
998 if (!elemEmpty)
999 {
1000 // As per Dave/Paul recommendation 12/06/2000
1001 // if (shouldIndent)
1002 // writer.write('>');
1003 // indent(m_currentIndent);
1004
1005 writer.write("></");
1006 writer.write(name);
1007 writer.write('>');
1008 }
1009 else
1010 {
1011 writer.write('>');
1012 }
1013 }
1014
1015 // clean up because the element has ended
1016 if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
1017 m_ispreserve = true;
1018 m_isprevtext = false;
1019
1020 // fire off the end element event
1021 if (m_tracer != null)
1022 super.fireEndElem(name);
1023
1024 // OPTIMIZE-EMPTY
1025 if (elemEmpty)
1026 {
1027 // a quick exit if the HTML element had no children.
1028 // This block of code can be removed if the corresponding block of code
1029 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
1030 m_elemContext = elemContext.m_prev;
1031 return;
1032 }
1033
1034 // some more clean because the element has ended.
1035 if (!elemContext.m_startTagOpen)
1036 {
1037 if (m_doIndent && !m_preserves.isEmpty())
1038 m_preserves.pop();
1039 }
1040 m_elemContext = elemContext.m_prev;
1041 // m_isRawStack.pop();
1042 }
1043 catch (IOException e)
1044 {
1045 throw new SAXException(e);
1046 }
1047 }
1048
1049 /**
1050 * Process an attribute.
1051 * @param writer The writer to write the processed output to.
1052 * @param name The name of the attribute.
1053 * @param value The value of the attribute.
1054 * @param elemDesc The description of the HTML element
1055 * that has this attribute.
1056 *
1057 * @throws org.xml.sax.SAXException
1058 */
1059 protected void processAttribute(
1060 java.io.Writer writer,
1061 String name,
1062 String value,
1063 ElemDesc elemDesc)
1064 throws IOException
1065 {
1066 writer.write(' ');
1067
1068 if ( ((value.length() == 0) || value.equalsIgnoreCase(name))
1069 && elemDesc != null
1070 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1071 {
1072 writer.write(name);
1073 }
1074 else
1075 {
1076 // %REVIEW% %OPT%
1077 // Two calls to single-char write may NOT
1078 // be more efficient than one to string-write...
1079 writer.write(name);
1080 writer.write("=\"");
1081 if ( elemDesc != null
1082 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1083 writeAttrURI(writer, value, m_specialEscapeURLs);
1084 else
1085 writeAttrString(writer, value, this.getEncoding());
1086 writer.write('"');
1087
1088 }
1089 }
1090
1091 /**
1092 * Tell if a character is an ASCII digit.
1093 */
1094 private boolean isASCIIDigit(char c)
1095 {
1096 return (c >= '0' && c <= '9');
1097 }
1098
1099 /**
1100 * Make an integer into an HH hex value.
1101 * Does no checking on the size of the input, since this
1102 * is only meant to be used locally by writeAttrURI.
1103 *
1104 * @param i must be a value less than 255.
1105 *
1106 * @return should be a two character string.
1107 */
1108 private static String makeHHString(int i)
1109 {
1110 String s = Integer.toHexString(i).toUpperCase();
1111 if (s.length() == 1)
1112 {
1113 s = "0" + s;
1114 }
1115 return s;
1116 }
1117
1118 /**
1119 * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
1120 * @param str must be 2 characters long
1121 *
1122 * @return true or false
1123 */
1124 private boolean isHHSign(String str)
1125 {
1126 boolean sign = true;
1127 try
1128 {
1129 char r = (char) Integer.parseInt(str, 16);
1130 }
1131 catch (NumberFormatException e)
1132 {
1133 sign = false;
1134 }
1135 return sign;
1136 }
1137
1138 /**
1139 * Write the specified <var>string</var> after substituting non ASCII characters,
1140 * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1141 *
1142 * @param string String to convert to XML format.
1143 * @param doURLEscaping True if we should try to encode as
1144 * per http://www.ietf.org/rfc/rfc2396.txt.
1145 *
1146 * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1147 */
1148 public void writeAttrURI(
1149 final java.io.Writer writer, String string, boolean doURLEscaping)
1150 throws IOException
1151 {
1152 // http://www.ietf.org/rfc/rfc2396.txt says:
1153 // A URI is always in an "escaped" form, since escaping or unescaping a
1154 // completed URI might change its semantics. Normally, the only time
1155 // escape encodings can safely be made is when the URI is being created
1156 // from its component parts; each component may have its own set of
1157 // characters that are reserved, so only the mechanism responsible for
1158 // generating or interpreting that component can determine whether or
1159 // not escaping a character will change its semantics. Likewise, a URI
1160 // must be separated into its components before the escaped characters
1161 // within those components can be safely decoded.
1162 //
1163 // ...So we do our best to do limited escaping of the URL, without
1164 // causing damage. If the URL is already properly escaped, in theory, this
1165 // function should not change the string value.
1166
1167 final int end = string.length();
1168 if (end > m_attrBuff.length)
1169 {
1170 m_attrBuff = new char[end*2 + 1];
1171 }
1172 string.getChars(0,end, m_attrBuff, 0);
1173 final char[] chars = m_attrBuff;
1174
1175 int cleanStart = 0;
1176 int cleanLength = 0;
1177
1178
1179 char ch = 0;
1180 for (int i = 0; i < end; i++)
1181 {
1182 ch = chars[i];
1183
1184 if ((ch < 32) || (ch > 126))
1185 {
1186 if (cleanLength > 0)
1187 {
1188 writer.write(chars, cleanStart, cleanLength);
1189 cleanLength = 0;
1190 }
1191 if (doURLEscaping)
1192 {
1193 // Encode UTF16 to UTF8.
1194 // Reference is Unicode, A Primer, by Tony Graham.
1195 // Page 92.
1196
1197 // Note that Kay doesn't escape 0x20...
1198 // if(ch == 0x20) // Not sure about this... -sb
1199 // {
1200 // writer.write(ch);
1201 // }
1202 // else
1203 if (ch <= 0x7F)
1204 {
1205 writer.write('%');
1206 writer.write(makeHHString(ch));
1207 }
1208 else if (ch <= 0x7FF)
1209 {
1210 // Clear low 6 bits before rotate, put high 4 bits in low byte,
1211 // and set two high bits.
1212 int high = (ch >> 6) | 0xC0;
1213 int low = (ch & 0x3F) | 0x80;
1214 // First 6 bits, + high bit
1215 writer.write('%');
1216 writer.write(makeHHString(high));
1217 writer.write('%');
1218 writer.write(makeHHString(low));
1219 }
1220 else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
1221 {
1222 // I'm sure this can be done in 3 instructions, but I choose
1223 // to try and do it exactly like it is done in the book, at least
1224 // until we are sure this is totally clean. I don't think performance
1225 // is a big issue with this particular function, though I could be
1226 // wrong. Also, the stuff below clearly does more masking than
1227 // it needs to do.
1228
1229 // Clear high 6 bits.
1230 int highSurrogate = ((int) ch) & 0x03FF;
1231
1232 // Middle 4 bits (wwww) + 1
1233 // "Note that the value of wwww from the high surrogate bit pattern
1234 // is incremented to make the uuuuu bit pattern in the scalar value
1235 // so the surrogate pair don't address the BMP."
1236 int wwww = ((highSurrogate & 0x03C0) >> 6);
1237 int uuuuu = wwww + 1;
1238
1239 // next 4 bits
1240 int zzzz = (highSurrogate & 0x003C) >> 2;
1241
1242 // low 2 bits
1243 int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1244
1245 // Get low surrogate character.
1246 ch = chars[++i];
1247
1248 // Clear high 6 bits.
1249 int lowSurrogate = ((int) ch) & 0x03FF;
1250
1251 // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1252 yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1253
1254 // bottom 6 bits.
1255 int xxxxxx = (lowSurrogate & 0x003F);
1256
1257 int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1258 int byte2 =
1259 0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1260 int byte3 = 0x80 | yyyyyy;
1261 int byte4 = 0x80 | xxxxxx;
1262
1263 writer.write('%');
1264 writer.write(makeHHString(byte1));
1265 writer.write('%');
1266 writer.write(makeHHString(byte2));
1267 writer.write('%');
1268 writer.write(makeHHString(byte3));
1269 writer.write('%');
1270 writer.write(makeHHString(byte4));
1271 }
1272 else
1273 {
1274 int high = (ch >> 12) | 0xE0; // top 4 bits
1275 int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1276 // middle 6 bits
1277 int low = (ch & 0x3F) | 0x80;
1278 // First 6 bits, + high bit
1279 writer.write('%');
1280 writer.write(makeHHString(high));
1281 writer.write('%');
1282 writer.write(makeHHString(middle));
1283 writer.write('%');
1284 writer.write(makeHHString(low));
1285 }
1286
1287 }
1288 else if (escapingNotNeeded(ch))
1289 {
1290 writer.write(ch);
1291 }
1292 else
1293 {
1294 writer.write("&#");
1295 writer.write(Integer.toString(ch));
1296 writer.write(';');
1297 }
1298 // In this character range we have first written out any previously accumulated
1299 // "clean" characters, then processed the current more complicated character,
1300 // which may have incremented "i".
1301 // We now we reset the next possible clean character.
1302 cleanStart = i + 1;
1303 }
1304 // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1305 // not allowing quotes in the URI proper syntax, nor in the fragment
1306 // identifier, we believe that it's OK to double escape quotes.
1307 else if (ch == '"')
1308 {
1309 // If the character is a '%' number number, try to avoid double-escaping.
1310 // There is a question if this is legal behavior.
1311
1312 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1313 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1314
1315 // if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1316
1317 // We are no longer escaping '%'
1318
1319 if (cleanLength > 0)
1320 {
1321 writer.write(chars, cleanStart, cleanLength);
1322 cleanLength = 0;
1323 }
1324
1325
1326 // Mike Kay encodes this as ", so he may know something I don't?
1327 if (doURLEscaping)
1328 writer.write("%22");
1329 else
1330 writer.write("""); // we have to escape this, I guess.
1331
1332 // We have written out any clean characters, then the escaped '%' and now we
1333 // We now we reset the next possible clean character.
1334 cleanStart = i + 1;
1335 }
1336 else if (ch == '&')
1337 {
1338 // HTML 4.01 reads, "Authors should use "&" (ASCII decimal 38)
1339 // instead of "&" to avoid confusion with the beginning of a character
1340 // reference (entity reference open delimiter).
1341 if (cleanLength > 0)
1342 {
1343 writer.write(chars, cleanStart, cleanLength);
1344 cleanLength = 0;
1345 }
1346 writer.write("&");
1347 cleanStart = i + 1;
1348 }
1349 else
1350 {
1351 // no processing for this character, just count how
1352 // many characters in a row that we have that need no processing
1353 cleanLength++;
1354 }
1355 }
1356
1357 // are there any clean characters at the end of the array
1358 // that we haven't processed yet?
1359 if (cleanLength > 1)
1360 {
1361 // if the whole string can be written out as-is do so
1362 // otherwise write out the clean chars at the end of the
1363 // array
1364 if (cleanStart == 0)
1365 writer.write(string);
1366 else
1367 writer.write(chars, cleanStart, cleanLength);
1368 }
1369 else if (cleanLength == 1)
1370 {
1371 // a little optimization for 1 clean character
1372 // (we could have let the previous if(...) handle them all)
1373 writer.write(ch);
1374 }
1375 }
1376
1377 /**
1378 * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1379 * and UTF-16 surrogates for character references <CODE>&#xnn</CODE>.
1380 *
1381 * @param string String to convert to XML format.
1382 * @param encoding CURRENTLY NOT IMPLEMENTED.
1383 *
1384 * @throws org.xml.sax.SAXException
1385 */
1386 public void writeAttrString(
1387 final java.io.Writer writer, String string, String encoding)
1388 throws IOException
1389 {
1390 final int end = string.length();
1391 if (end > m_attrBuff.length)
1392 {
1393 m_attrBuff = new char[end * 2 + 1];
1394 }
1395 string.getChars(0, end, m_attrBuff, 0);
1396 final char[] chars = m_attrBuff;
1397
1398
1399
1400 int cleanStart = 0;
1401 int cleanLength = 0;
1402
1403 char ch = 0;
1404 for (int i = 0; i < end; i++)
1405 {
1406 ch = chars[i];
1407
1408 // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1409 // System.out.println("ch: "+(int)ch);
1410 // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1411 // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1412 if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch)))
1413 {
1414 cleanLength++;
1415 }
1416 else if ('<' == ch || '>' == ch)
1417 {
1418 cleanLength++; // no escaping in this case, as specified in 15.2
1419 }
1420 else if (
1421 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1422 {
1423 cleanLength++; // no escaping in this case, as specified in 15.2
1424 }
1425 else
1426 {
1427 if (cleanLength > 0)
1428 {
1429 writer.write(chars,cleanStart,cleanLength);
1430 cleanLength = 0;
1431 }
1432 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1433
1434 if (i != pos)
1435 {
1436 i = pos - 1;
1437 }
1438 else
1439 {
1440 if (Encodings.isHighUTF16Surrogate(ch))
1441 {
1442
1443 writeUTF16Surrogate(ch, chars, i, end);
1444 i++; // two input characters processed
1445 // this increments by one and the for()
1446 // loop itself increments by another one.
1447 }
1448
1449 // The next is kind of a hack to keep from escaping in the case
1450 // of Shift_JIS and the like.
1451
1452 /*
1453 else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1454 && (ch != 160))
1455 {
1456 writer.write(ch); // no escaping in this case
1457 }
1458 else
1459 */
1460 String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1461 if (null != outputStringForChar)
1462 {
1463 writer.write(outputStringForChar);
1464 }
1465 else if (escapingNotNeeded(ch))
1466 {
1467 writer.write(ch); // no escaping in this case
1468 }
1469 else
1470 {
1471 writer.write("&#");
1472 writer.write(Integer.toString(ch));
1473 writer.write(';');
1474 }
1475 }
1476 cleanStart = i + 1;
1477 }
1478 } // end of for()
1479
1480 // are there any clean characters at the end of the array
1481 // that we haven't processed yet?
1482 if (cleanLength > 1)
1483 {
1484 // if the whole string can be written out as-is do so
1485 // otherwise write out the clean chars at the end of the
1486 // array
1487 if (cleanStart == 0)
1488 writer.write(string);
1489 else
1490 writer.write(chars, cleanStart, cleanLength);
1491 }
1492 else if (cleanLength == 1)
1493 {
1494 // a little optimization for 1 clean character
1495 // (we could have let the previous if(...) handle them all)
1496 writer.write(ch);
1497 }
1498 }
1499
1500
1501
1502 /**
1503 * Receive notification of character data.
1504 *
1505 * <p>The Parser will call this method to report each chunk of
1506 * character data. SAX parsers may return all contiguous character
1507 * data in a single chunk, or they may split it into several
1508 * chunks; however, all of the characters in any single event
1509 * must come from the same external entity, so that the Locator
1510 * provides useful information.</p>
1511 *
1512 * <p>The application must not attempt to read from the array
1513 * outside of the specified range.</p>
1514 *
1515 * <p>Note that some parsers will report whitespace using the
1516 * ignorableWhitespace() method rather than this one (validating
1517 * parsers must do so).</p>
1518 *
1519 * @param chars The characters from the XML document.
1520 * @param start The start position in the array.
1521 * @param length The number of characters to read from the array.
1522 * @throws org.xml.sax.SAXException Any SAX exception, possibly
1523 * wrapping another exception.
1524 * @see #ignorableWhitespace
1525 * @see org.xml.sax.Locator
1526 *
1527 * @throws org.xml.sax.SAXException
1528 */
1529 public final void characters(char chars[], int start, int length)
1530 throws org.xml.sax.SAXException
1531 {
1532
1533 if (m_elemContext.m_isRaw)
1534 {
1535 try
1536 {
1537 // Clean up some pending issues.
1538 if (m_elemContext.m_startTagOpen)
1539 {
1540 closeStartTag();
1541 m_elemContext.m_startTagOpen = false;
1542 }
1543
1544 m_ispreserve = true;
1545
1546 writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1547
1548 // time to generate characters event
1549 if (m_tracer != null)
1550 super.fireCharEvent(chars, start, length);
1551
1552 return;
1553 }
1554 catch (IOException ioe)
1555 {
1556 throw new org.xml.sax.SAXException(
1557 Utils.messages.createMessage(MsgKey.ER_OIERROR,null),ioe);
1558 }
1559 }
1560 else
1561 {
1562 super.characters(chars, start, length);
1563 }
1564 }
1565
1566 /**
1567 * Receive notification of cdata.
1568 *
1569 * <p>The Parser will call this method to report each chunk of
1570 * character data. SAX parsers may return all contiguous character
1571 * data in a single chunk, or they may split it into several
1572 * chunks; however, all of the characters in any single event
1573 * must come from the same external entity, so that the Locator
1574 * provides useful information.</p>
1575 *
1576 * <p>The application must not attempt to read from the array
1577 * outside of the specified range.</p>
1578 *
1579 * <p>Note that some parsers will report whitespace using the
1580 * ignorableWhitespace() method rather than this one (validating
1581 * parsers must do so).</p>
1582 *
1583 * @param ch The characters from the XML document.
1584 * @param start The start position in the array.
1585 * @param length The number of characters to read from the array.
1586 * @throws org.xml.sax.SAXException Any SAX exception, possibly
1587 * wrapping another exception.
1588 * @see #ignorableWhitespace
1589 * @see org.xml.sax.Locator
1590 *
1591 * @throws org.xml.sax.SAXException
1592 */
1593 public final void cdata(char ch[], int start, int length)
1594 throws org.xml.sax.SAXException
1595 {
1596
1597 if ((null != m_elemContext.m_elementName)
1598 && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1599 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1600 {
1601 try
1602 {
1603 if (m_elemContext.m_startTagOpen)
1604 {
1605 closeStartTag();
1606 m_elemContext.m_startTagOpen = false;
1607 }
1608
1609 m_ispreserve = true;
1610
1611 if (shouldIndent())
1612 indent();
1613
1614 // writer.write(ch, start, length);
1615 writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1616 }
1617 catch (IOException ioe)
1618 {
1619 throw new org.xml.sax.SAXException(
1620 Utils.messages.createMessage(
1621 MsgKey.ER_OIERROR,
1622 null),
1623 ioe);
1624 //"IO error", ioe);
1625 }
1626 }
1627 else
1628 {
1629 super.cdata(ch, start, length);
1630 }
1631 }
1632
1633 /**
1634 * Receive notification of a processing instruction.
1635 *
1636 * @param target The processing instruction target.
1637 * @param data The processing instruction data, or null if
1638 * none was supplied.
1639 * @throws org.xml.sax.SAXException Any SAX exception, possibly
1640 * wrapping another exception.
1641 *
1642 * @throws org.xml.sax.SAXException
1643 */
1644 public void processingInstruction(String target, String data)
1645 throws org.xml.sax.SAXException
1646 {
1647
1648 // Process any pending starDocument and startElement first.
1649 flushPending();
1650
1651 // Use a fairly nasty hack to tell if the next node is supposed to be
1652 // unescaped text.
1653 if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1654 {
1655 startNonEscaping();
1656 }
1657 else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1658 {
1659 endNonEscaping();
1660 }
1661 else
1662 {
1663 try
1664 {
1665 // clean up any pending things first
1666 if (m_elemContext.m_startTagOpen)
1667 {
1668 closeStartTag();
1669 m_elemContext.m_startTagOpen = false;
1670 }
1671 else if (m_cdataTagOpen)
1672 {
1673 closeCDATA();
1674 }
1675 else if (m_needToCallStartDocument)
1676 {
1677 startDocumentInternal();
1678 }
1679
1680
1681 /*
1682 * Perhaps processing instructions can be written out in HTML before
1683 * the DOCTYPE, in which case this could be emitted with the
1684 * startElement call, that knows the name of the document element
1685 * doing it right.
1686 */
1687 if (true == m_needToOutputDocTypeDecl)
1688 outputDocTypeDecl("html"); // best guess for the upcoming element
1689
1690
1691 if (shouldIndent())
1692 indent();
1693
1694 final java.io.Writer writer = m_writer;
1695 //writer.write("<?" + target);
1696 writer.write("<?");
1697 writer.write(target);
1698
1699 if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1700 writer.write(' ');
1701
1702 //writer.write(data + ">"); // different from XML
1703 writer.write(data); // different from XML
1704 writer.write('>'); // different from XML
1705
1706 // Always output a newline char if not inside of an
1707 // element. The whitespace is not significant in that
1708 // case.
1709 if (m_elemContext.m_currentElemDepth <= 0)
1710 outputLineSep();
1711
1712 m_startNewLine = true;
1713 }
1714 catch(IOException e)
1715 {
1716 throw new SAXException(e);
1717 }
1718 }
1719
1720 // now generate the PI event
1721 if (m_tracer != null)
1722 super.fireEscapingEvent(target, data);
1723 }
1724
1725 /**
1726 * Receive notivication of a entityReference.
1727 *
1728 * @param name non-null reference to entity name string.
1729 *
1730 * @throws org.xml.sax.SAXException
1731 */
1732 public final void entityReference(String name)
1733 throws org.xml.sax.SAXException
1734 {
1735 try
1736 {
1737
1738 final java.io.Writer writer = m_writer;
1739 writer.write('&');
1740 writer.write(name);
1741 writer.write(';');
1742
1743 } catch(IOException e)
1744 {
1745 throw new SAXException(e);
1746 }
1747 }
1748 /**
1749 * @see ExtendedContentHandler#endElement(String)
1750 */
1751 public final void endElement(String elemName) throws SAXException
1752 {
1753 endElement(null, null, elemName);
1754 }
1755
1756 /**
1757 * Process the attributes, which means to write out the currently
1758 * collected attributes to the writer. The attributes are not
1759 * cleared by this method
1760 *
1761 * @param writer the writer to write processed attributes to.
1762 * @param nAttrs the number of attributes in m_attributes
1763 * to be processed
1764 *
1765 * @throws org.xml.sax.SAXException
1766 */
1767 public void processAttributes(java.io.Writer writer, int nAttrs)
1768 throws IOException,SAXException
1769 {
1770 /*
1771 * process the collected attributes
1772 */
1773 for (int i = 0; i < nAttrs; i++)
1774 {
1775 processAttribute(
1776 writer,
1777 m_attributes.getQName(i),
1778 m_attributes.getValue(i),
1779 m_elemContext.m_elementDesc);
1780 }
1781 }
1782
1783 /**
1784 * For the enclosing elements starting tag write out out any attributes
1785 * followed by ">". At this point we also mark if this element is
1786 * a cdata-section-element.
1787 *
1788 *@throws org.xml.sax.SAXException
1789 */
1790 protected void closeStartTag() throws SAXException
1791 {
1792 try
1793 {
1794
1795 // finish processing attributes, time to fire off the start element event
1796 if (m_tracer != null)
1797 super.fireStartElem(m_elemContext.m_elementName);
1798
1799 int nAttrs = m_attributes.getLength();
1800 if (nAttrs>0)
1801 {
1802 processAttributes(m_writer, nAttrs);
1803 // clear attributes object for re-use with next element
1804 m_attributes.clear();
1805 }
1806
1807 m_writer.write('>');
1808
1809 /* At this point we have the prefix mappings now, so
1810 * lets determine if the current element is specified in the cdata-
1811 * section-elements list.
1812 */
1813 if (m_CdataElems != null) // if there are any cdata sections
1814 m_elemContext.m_isCdataSection = isCdataSection();
1815 if (m_doIndent)
1816 {
1817 m_isprevtext = false;
1818 m_preserves.push(m_ispreserve);
1819 }
1820
1821 }
1822 catch(IOException e)
1823 {
1824 throw new SAXException(e);
1825 }
1826 }
1827
1828
1829
1830 /**
1831 * This method is used when a prefix/uri namespace mapping
1832 * is indicated after the element was started with a
1833 * startElement() and before and endElement().
1834 * startPrefixMapping(prefix,uri) would be used before the
1835 * startElement() call.
1836 * @param uri the URI of the namespace
1837 * @param prefix the prefix associated with the given URI.
1838 *
1839 * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1840 */
1841 public void namespaceAfterStartElement(String prefix, String uri)
1842 throws SAXException
1843 {
1844 // hack for XSLTC with finding URI for default namespace
1845 if (m_elemContext.m_elementURI == null)
1846 {
1847 String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1848 if (prefix1 == null && EMPTYSTRING.equals(prefix))
1849 {
1850 // the elements URI is not known yet, and it
1851 // doesn't have a prefix, and we are currently
1852 // setting the uri for prefix "", so we have
1853 // the uri for the element... lets remember it
1854 m_elemContext.m_elementURI = uri;
1855 }
1856 }
1857 startPrefixMapping(prefix,uri,false);
1858 }
1859
1860 public void startDTD(String name, String publicId, String systemId)
1861 throws SAXException
1862 {
1863 m_inDTD = true;
1864 super.startDTD(name, publicId, systemId);
1865 }
1866
1867 /**
1868 * Report the end of DTD declarations.
1869 * @throws org.xml.sax.SAXException The application may raise an exception.
1870 * @see #startDTD
1871 */
1872 public void endDTD() throws org.xml.sax.SAXException
1873 {
1874 m_inDTD = false;
1875 /* for ToHTMLStream the DOCTYPE is entirely output in the
1876 * startDocumentInternal() method, so don't do anything here
1877 */
1878 }
1879 /**
1880 * This method does nothing.
1881 */
1882 public void attributeDecl(
1883 String eName,
1884 String aName,
1885 String type,
1886 String valueDefault,
1887 String value)
1888 throws SAXException
1889 {
1890 // The internal DTD subset is not serialized by the ToHTMLStream serializer
1891 }
1892
1893 /**
1894 * This method does nothing.
1895 */
1896 public void elementDecl(String name, String model) throws SAXException
1897 {
1898 // The internal DTD subset is not serialized by the ToHTMLStream serializer
1899 }
1900 /**
1901 * This method does nothing.
1902 */
1903 public void internalEntityDecl(String name, String value)
1904 throws SAXException
1905 {
1906 // The internal DTD subset is not serialized by the ToHTMLStream serializer
1907 }
1908 /**
1909 * This method does nothing.
1910 */
1911 public void externalEntityDecl(
1912 String name,
1913 String publicId,
1914 String systemId)
1915 throws SAXException
1916 {
1917 // The internal DTD subset is not serialized by the ToHTMLStream serializer
1918 }
1919
1920 /**
1921 * This method is used to add an attribute to the currently open element.
1922 * The caller has guaranted that this attribute is unique, which means that it
1923 * not been seen before and will not be seen again.
1924 *
1925 * @param name the qualified name of the attribute
1926 * @param value the value of the attribute which can contain only
1927 * ASCII printable characters characters in the range 32 to 127 inclusive.
1928 * @param flags the bit values of this integer give optimization information.
1929 */
1930 public void addUniqueAttribute(String name, String value, int flags)
1931 throws SAXException
1932 {
1933 try
1934 {
1935 final java.io.Writer writer = m_writer;
1936 if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1937 {
1938 // "flags" has indicated that the characters
1939 // '>' '<' '&' and '"' are not in the value and
1940 // m_htmlcharInfo has recorded that there are no other
1941 // entities in the range 0 to 127 so we write out the
1942 // value directly
1943 writer.write(' ');
1944 writer.write(name);
1945 writer.write("=\"");
1946 writer.write(value);
1947 writer.write('"');
1948 }
1949 else if (
1950 (flags & HTML_ATTREMPTY) > 0
1951 && (value.length() == 0 || value.equalsIgnoreCase(name)))
1952 {
1953 writer.write(' ');
1954 writer.write(name);
1955 }
1956 else
1957 {
1958 writer.write(' ');
1959 writer.write(name);
1960 writer.write("=\"");
1961 if ((flags & HTML_ATTRURL) > 0)
1962 {
1963 writeAttrURI(writer, value, m_specialEscapeURLs);
1964 }
1965 else
1966 {
1967 writeAttrString(writer, value, this.getEncoding());
1968 }
1969 writer.write('"');
1970 }
1971 } catch (IOException e) {
1972 throw new SAXException(e);
1973 }
1974 }
1975
1976 public void comment(char ch[], int start, int length)
1977 throws SAXException
1978 {
1979 // The internal DTD subset is not serialized by the ToHTMLStream serializer
1980 if (m_inDTD)
1981 return;
1982
1983 // Clean up some pending issues, just in case
1984 // this call is coming right after a startElement()
1985 // or we are in the middle of writing out CDATA
1986 // or if a startDocument() call was not received
1987 if (m_elemContext.m_startTagOpen)
1988 {
1989 closeStartTag();
1990 m_elemContext.m_startTagOpen = false;
1991 }
1992 else if (m_cdataTagOpen)
1993 {
1994 closeCDATA();
1995 }
1996 else if (m_needToCallStartDocument)
1997 {
1998 startDocumentInternal();
1999 }
2000
2001 /*
2002 * Perhaps comments can be written out in HTML before the DOCTYPE.
2003 * In this case we might delete this call to writeOutDOCTYPE, and
2004 * it would be handled within the startElement() call.
2005 */
2006 if (m_needToOutputDocTypeDecl)
2007 outputDocTypeDecl("html"); // best guess for the upcoming element
2008
2009 super.comment(ch, start, length);
2010 }
2011
2012 public boolean reset()
2013 {
2014 boolean ret = super.reset();
2015 if (!ret)
2016 return false;
2017 resetToHTMLStream();
2018 return true;
2019 }
2020
2021 private void resetToHTMLStream()
2022 {
2023 // m_htmlcharInfo remains unchanged
2024 // m_htmlInfo = null; // Don't reset
2025 m_inBlockElem = false;
2026 m_inDTD = false;
2027 m_omitMetaTag = false;
2028 m_specialEscapeURLs = true;
2029 }
2030
2031 static class Trie
2032 {
2033 /**
2034 * A digital search trie for 7-bit ASCII text
2035 * The API is a subset of java.util.Hashtable
2036 * The key must be a 7-bit ASCII string
2037 * The value may be any Java Object
2038 * One can get an object stored in a trie from its key,
2039 * but the search is either case sensitive or case
2040 * insensitive to the characters in the key, and this
2041 * choice of sensitivity or insensitivity is made when
2042 * the Trie is created, before any objects are put in it.
2043 *
2044 * This class is a copy of the one in org.apache.xml.utils.
2045 * It exists to cut the serializers dependancy on that package.
2046 *
2047 * @xsl.usage internal
2048 */
2049
2050 /** Size of the m_nextChar array. */
2051 public static final int ALPHA_SIZE = 128;
2052
2053 /** The root node of the tree. */
2054 final Node m_Root;
2055
2056 /** helper buffer to convert Strings to char arrays */
2057 private char[] m_charBuffer = new char[0];
2058
2059 /** true if the search for an object is lower case only with the key */
2060 private final boolean m_lowerCaseOnly;
2061
2062 /**
2063 * Construct the trie that has a case insensitive search.
2064 */
2065 public Trie()
2066 {
2067 m_Root = new Node();
2068 m_lowerCaseOnly = false;
2069 }
2070
2071 /**
2072 * Construct the trie given the desired case sensitivity with the key.
2073 * @param lowerCaseOnly true if the search keys are to be loser case only,
2074 * not case insensitive.
2075 */
2076 public Trie(boolean lowerCaseOnly)
2077 {
2078 m_Root = new Node();
2079 m_lowerCaseOnly = lowerCaseOnly;
2080 }
2081
2082 /**
2083 * Put an object into the trie for lookup.
2084 *
2085 * @param key must be a 7-bit ASCII string
2086 * @param value any java object.
2087 *
2088 * @return The old object that matched key, or null.
2089 */
2090 public Object put(String key, Object value)
2091 {
2092
2093 final int len = key.length();
2094 if (len > m_charBuffer.length)
2095 {
2096 // make the biggest buffer ever needed in get(String)
2097 m_charBuffer = new char[len];
2098 }
2099
2100 Node node = m_Root;
2101
2102 for (int i = 0; i < len; i++)
2103 {
2104 Node nextNode =
2105 node.m_nextChar[Character.toLowerCase(key.charAt(i))];
2106
2107 if (nextNode != null)
2108 {
2109 node = nextNode;
2110 }
2111 else
2112 {
2113 for (; i < len; i++)
2114 {
2115 Node newNode = new Node();
2116 if (m_lowerCaseOnly)
2117 {
2118 // put this value into the tree only with a lower case key
2119 node.m_nextChar[Character.toLowerCase(
2120 key.charAt(i))] =
2121 newNode;
2122 }
2123 else
2124 {
2125 // put this value into the tree with a case insensitive key
2126 node.m_nextChar[Character.toUpperCase(
2127 key.charAt(i))] =
2128 newNode;
2129 node.m_nextChar[Character.toLowerCase(
2130 key.charAt(i))] =
2131 newNode;
2132 }
2133 node = newNode;
2134 }
2135 break;
2136 }
2137 }
2138
2139 Object ret = node.m_Value;
2140
2141 node.m_Value = value;
2142
2143 return ret;
2144 }
2145
2146 /**
2147 * Get an object that matches the key.
2148 *
2149 * @param key must be a 7-bit ASCII string
2150 *
2151 * @return The object that matches the key, or null.
2152 */
2153 public Object get(final String key)
2154 {
2155
2156 final int len = key.length();
2157
2158 /* If the name is too long, we won't find it, this also keeps us
2159 * from overflowing m_charBuffer
2160 */
2161 if (m_charBuffer.length < len)
2162 return null;
2163
2164 Node node = m_Root;
2165 switch (len) // optimize the look up based on the number of chars
2166 {
2167 // case 0 looks silly, but the generated bytecode runs
2168 // faster for lookup of elements of length 2 with this in
2169 // and a fair bit faster. Don't know why.
2170 case 0 :
2171 {
2172 return null;
2173 }
2174
2175 case 1 :
2176 {
2177 final char ch = key.charAt(0);
2178 if (ch < ALPHA_SIZE)
2179 {
2180 node = node.m_nextChar[ch];
2181 if (node != null)
2182 return node.m_Value;
2183 }
2184 return null;
2185 }
2186 // comment out case 2 because the default is faster
2187 // case 2 :
2188 // {
2189 // final char ch0 = key.charAt(0);
2190 // final char ch1 = key.charAt(1);
2191 // if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
2192 // {
2193 // node = node.m_nextChar[ch0];
2194 // if (node != null)
2195 // {
2196 //
2197 // if (ch1 < ALPHA_SIZE)
2198 // {
2199 // node = node.m_nextChar[ch1];
2200 // if (node != null)
2201 // return node.m_Value;
2202 // }
2203 // }
2204 // }
2205 // return null;
2206 // }
2207 default :
2208 {
2209 for (int i = 0; i < len; i++)
2210 {
2211 // A thread-safe way to loop over the characters
2212 final char ch = key.charAt(i);
2213 if (ALPHA_SIZE <= ch)
2214 {
2215 // the key is not 7-bit ASCII so we won't find it here
2216 return null;
2217 }
2218
2219 node = node.m_nextChar[ch];
2220 if (node == null)
2221 return null;
2222 }
2223
2224 return node.m_Value;
2225 }
2226 }
2227 }
2228
2229 /**
2230 * The node representation for the trie.
2231 * @xsl.usage internal
2232 */
2233 private static class Node
2234 {
2235
2236 /**
2237 * Constructor, creates a Node[ALPHA_SIZE].
2238 */
2239 Node()
2240 {
2241 m_nextChar = new Node[ALPHA_SIZE];
2242 m_Value = null;
2243 }
2244
2245 /** The next nodes. */
2246 final Node m_nextChar[];
2247
2248 /** The value. */
2249 Object m_Value;
2250 }
2251 /**
2252 * Construct the trie from another Trie.
2253 * Both the existing Trie and this new one share the same table for
2254 * lookup, and it is assumed that the table is fully populated and
2255 * not changing anymore.
2256 *
2257 * @param existingTrie the Trie that this one is a copy of.
2258 */
2259 public Trie(Trie existingTrie)
2260 {
2261 // copy some fields from the existing Trie into this one.
2262 m_Root = existingTrie.m_Root;
2263 m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
2264
2265 // get a buffer just big enough to hold the longest key in the table.
2266 int max = existingTrie.getLongestKeyLength();
2267 m_charBuffer = new char[max];
2268 }
2269
2270 /**
2271 * Get an object that matches the key.
2272 * This method is faster than get(), but is not thread-safe.
2273 *
2274 * @param key must be a 7-bit ASCII string
2275 *
2276 * @return The object that matches the key, or null.
2277 */
2278 public Object get2(final String key)
2279 {
2280
2281 final int len = key.length();
2282
2283 /* If the name is too long, we won't find it, this also keeps us
2284 * from overflowing m_charBuffer
2285 */
2286 if (m_charBuffer.length < len)
2287 return null;
2288
2289 Node node = m_Root;
2290 switch (len) // optimize the look up based on the number of chars
2291 {
2292 // case 0 looks silly, but the generated bytecode runs
2293 // faster for lookup of elements of length 2 with this in
2294 // and a fair bit faster. Don't know why.
2295 case 0 :
2296 {
2297 return null;
2298 }
2299
2300 case 1 :
2301 {
2302 final char ch = key.charAt(0);
2303 if (ch < ALPHA_SIZE)
2304 {
2305 node = node.m_nextChar[ch];
2306 if (node != null)
2307 return node.m_Value;
2308 }
2309 return null;
2310 }
2311 default :
2312 {
2313 /* Copy string into array. This is not thread-safe because
2314 * it modifies the contents of m_charBuffer. If multiple
2315 * threads were to use this Trie they all would be
2316 * using this same array (not good). So this
2317 * method is not thread-safe, but it is faster because
2318 * converting to a char[] and looping over elements of
2319 * the array is faster than a String's charAt(i).
2320 */
2321 key.getChars(0, len, m_charBuffer, 0);
2322
2323 for (int i = 0; i < len; i++)
2324 {
2325 final char ch = m_charBuffer[i];
2326 if (ALPHA_SIZE <= ch)
2327 {
2328 // the key is not 7-bit ASCII so we won't find it here
2329 return null;
2330 }
2331
2332 node = node.m_nextChar[ch];
2333 if (node == null)
2334 return null;
2335 }
2336
2337 return node.m_Value;
2338 }
2339 }
2340 }
2341
2342 /**
2343 * Get the length of the longest key used in the table.
2344 */
2345 public int getLongestKeyLength()
2346 {
2347 return m_charBuffer.length;
2348 }
2349 }
2350 }