in src/main/java/org/apache/sling/scripting/jsp/jasper/compiler/ParserController.java [264:400]
private void determineSyntaxAndEncoding(String absFileName,
JarFile jarFile,
String jspConfigPageEnc)
throws JasperException, IOException {
isXml = false;
/*
* 'true' if the syntax (XML or standard) of the file is given
* from external information: either via a JSP configuration element,
* the ".jspx" suffix, or the enclosing file (for included resources)
*/
boolean isExternal = false;
/*
* Indicates whether we need to revert from temporary usage of
* "ISO-8859-1" back to "UTF-8"
*/
boolean revert = false;
JspConfig jspConfig = ctxt.getOptions().getJspConfig();
JspConfig.JspProperty jspProperty = jspConfig.findJspProperty(
absFileName);
if (jspProperty.isXml() != null) {
// If <is-xml> is specified in a <jsp-property-group>, it is used.
isXml = JspUtil.booleanValue(jspProperty.isXml());
isExternal = true;
} else if (absFileName.endsWith(".jspx")
|| absFileName.endsWith(".tagx")) {
isXml = true;
isExternal = true;
}
if (isExternal && !isXml) {
// JSP (standard) syntax. Use encoding specified in jsp-config
// if provided.
sourceEnc = jspConfigPageEnc;
if (sourceEnc != null) {
return;
}
// We don't know the encoding, so use BOM to determine it
sourceEnc = "ISO-8859-1";
} else {
// XML syntax or unknown, (auto)detect encoding ...
Object[] ret = XMLEncodingDetector.getEncoding(absFileName,
jarFile, ctxt, err);
sourceEnc = (String) ret[0];
if (((Boolean) ret[1]).booleanValue()) {
isEncodingSpecifiedInProlog = true;
}
if (((Boolean) ret[2]).booleanValue()) {
isBomPresent = true;
}
skip = ((Integer) ret[3]).intValue();
if (!isXml && sourceEnc.equals("UTF-8")) {
/*
* We don't know if we're dealing with XML or standard syntax.
* Therefore, we need to check to see if the page contains
* a <jsp:root> element.
*
* We need to be careful, because the page may be encoded in
* ISO-8859-1 (or something entirely different), and may
* contain byte sequences that will cause a UTF-8 converter to
* throw exceptions.
*
* It is safe to use a source encoding of ISO-8859-1 in this
* case, as there are no invalid byte sequences in ISO-8859-1,
* and the byte/character sequences we're looking for (i.e.,
* <jsp:root>) are identical in either encoding (both UTF-8
* and ISO-8859-1 are extensions of ASCII).
*/
sourceEnc = "ISO-8859-1";
revert = true;
}
}
if (isXml) {
// (This implies 'isExternal' is TRUE.)
// We know we're dealing with a JSP document (via JSP config or
// ".jspx" suffix), so we're done.
return;
}
/*
* At this point, 'isExternal' or 'isXml' is FALSE.
* Search for jsp:root action, in order to determine if we're dealing
* with XML or standard syntax (unless we already know what we're
* dealing with, i.e., when 'isExternal' is TRUE and 'isXml' is FALSE).
* No check for XML prolog, since nothing prevents a page from
* outputting XML and still using JSP syntax (in this case, the
* XML prolog is treated as template text).
*/
JspReader jspReader = null;
try {
jspReader = new JspReader(ctxt, absFileName, sourceEnc, jarFile,
err);
} catch (FileNotFoundException ex) {
throw new JasperException(ex);
}
jspReader.setSingleFile(true);
Mark startMark = jspReader.mark();
if (!isExternal) {
jspReader.reset(startMark);
if (hasJspRoot(jspReader)) {
if (revert) {
sourceEnc = "UTF-8";
}
isXml = true;
return;
} else {
if (revert && isBomPresent) {
sourceEnc = "UTF-8";
}
isXml = false;
}
}
/*
* At this point, we know we're dealing with JSP syntax.
* If an XML prolog is provided, it's treated as template text.
* Determine the page encoding from the page directive, unless it's
* specified via JSP config.
*/
if (!isBomPresent) {
sourceEnc = jspConfigPageEnc;
if (sourceEnc == null) {
sourceEnc = getPageEncodingForJspSyntax(jspReader, startMark);
if (sourceEnc == null) {
// Default to "ISO-8859-1" per JSP spec
sourceEnc = "ISO-8859-1";
isDefaultPageEncoding = true;
}
}
}
}