in modules/filters/mod_proxy_html.c [396:716]
static void pstartElement(void *ctxt, const xmlChar *uname,
const xmlChar** uattrs)
{
int required_attrs;
int num_match;
size_t offs, len;
char *subs;
rewrite_t is_uri;
const char** a;
urlmap *m;
size_t s_to, s_from, match;
char *found;
saxctxt *ctx = (saxctxt*) ctxt;
size_t nmatch;
ap_regmatch_t pmatch[10];
#ifndef GO_FASTER
int verbose = APLOGrtrace1(ctx->f->r);
#endif
apr_array_header_t *linkattrs;
int i;
const char *name = (const char*) uname;
const char** attrs = (const char**) uattrs;
const htmlElemDesc* desc = htmlTagLookup(uname);
urlmap *themap = ctx->map;
const char *accept_charset = NULL;
#ifdef HAVE_STACK
const void** descp;
#endif
int enforce = 0;
if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
/* enforce html */
if (!desc || desc->depr) {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01416)
"Bogus HTML element %s dropped", name);
return;
}
enforce = 2;
}
else if ((ctx->cfg->doctype == fpi_html_legacy)
|| (ctx->cfg->doctype == fpi_xhtml_legacy)) {
/* enforce html legacy */
if (!desc) {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01417)
"Deprecated HTML element %s dropped", name);
return;
}
enforce = 1;
}
#ifdef HAVE_STACK
descp = apr_array_push(ctx->stack);
*descp = desc;
/* TODO - implement HTML "allowed here" */
#endif
/* PR#64443: for <FORM>, insert accept-charset attribute if necessary
* It's necessary if we've changed the charset (i.e. input not UTF-8)
* UNLESS someone has taken charge.
* If there's already an accept-charset, then the backend is in charge.
* If ProxyHTMLCharsetOut is set, the sysop has taken charge.
*/
if ((xml2enc_charset != NULL) && (ctx->cfg->charset_out == NULL)
&& !strcasecmp(name, "FORM")) {
xmlCharEncoding enc;
if ((xml2enc_charset(ctx->f->r, &enc, &accept_charset) != APR_SUCCESS)
|| (enc == XML_CHAR_ENCODING_UTF8)) {
accept_charset = NULL; /* Now pay attention if not NULL */
}
}
ap_fputc(ctx->f->next, ctx->bb, '<');
ap_fputs(ctx->f->next, ctx->bb, name);
required_attrs = 0;
if ((enforce > 0) && (desc != NULL) && (desc->attrs_req != NULL))
for (a = desc->attrs_req; *a; a++)
++required_attrs;
if (attrs) {
linkattrs = apr_hash_get(ctx->cfg->links, name, APR_HASH_KEY_STRING);
for (a = attrs; *a; a += 2) {
if (desc && enforce > 0) {
switch (htmlAttrAllowed(desc, (xmlChar*)*a, 2-enforce)) {
case HTML_INVALID:
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01418)
"Bogus HTML attribute %s of %s dropped",
*a, name);
continue;
case HTML_DEPRECATED:
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01419)
"Deprecated HTML attribute %s of %s dropped",
*a, name);
continue;
case HTML_REQUIRED:
required_attrs--; /* cross off the number still needed */
/* fallthrough - required implies valid */
default:
break;
}
}
ctx->offset = 0;
if (a[1]) {
pappend(ctx, a[1], strlen(a[1])+1);
is_uri = ATTR_IGNORE;
if (linkattrs) {
tattr *attrs = (tattr*) linkattrs->elts;
for (i=0; i < linkattrs->nelts; ++i) {
if (!strcmp(*a, attrs[i].val)) {
is_uri = ATTR_URI;
break;
}
}
}
if ((is_uri == ATTR_IGNORE) && ctx->cfg->extfix
&& (ctx->cfg->events != NULL)) {
for (i=0; i < ctx->cfg->events->nelts; ++i) {
tattr *attrs = (tattr*) ctx->cfg->events->elts;
if (!strcmp(*a, attrs[i].val)) {
is_uri = ATTR_EVENT;
break;
}
}
}
switch (is_uri) {
case ATTR_URI:
num_match = 0;
for (m = themap; m; m = m->next) {
if (!(m->flags & M_HTML))
continue;
if (m->flags & M_REGEX) {
nmatch = 10;
if (!ap_regexec(m->from.r, ctx->buf, nmatch,
pmatch, 0)) {
++num_match;
offs = match = pmatch[0].rm_so;
s_from = pmatch[0].rm_eo - match;
subs = ap_pregsub(ctx->f->r->pool, m->to,
ctx->buf, nmatch, pmatch);
VERBOSE({
const char *f;
f = apr_pstrndup(ctx->f->r->pool,
ctx->buf + offs, s_from);
ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0,
ctx->f->r,
"H/RX: match at %s, substituting %s",
f, subs);
})
s_to = strlen(subs);
len = strlen(ctx->buf);
if (s_to > s_from) {
preserve(ctx, s_to - s_from);
memmove(ctx->buf+offs+s_to,
ctx->buf+offs+s_from,
len + 1 - s_from - offs);
memcpy(ctx->buf+offs, subs, s_to);
}
else {
memcpy(ctx->buf + offs, subs, s_to);
memmove(ctx->buf+offs+s_to,
ctx->buf+offs+s_from,
len + 1 - s_from - offs);
}
}
} else {
s_from = strlen(m->from.c);
if (!strncasecmp(ctx->buf, m->from.c, s_from)) {
++num_match;
s_to = strlen(m->to);
len = strlen(ctx->buf);
VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3,
0, ctx->f->r,
"H: matched %s, substituting %s",
m->from.c, m->to));
if (s_to > s_from) {
preserve(ctx, s_to - s_from);
memmove(ctx->buf+s_to, ctx->buf+s_from,
len + 1 - s_from);
memcpy(ctx->buf, m->to, s_to);
}
else { /* it fits in the existing space */
memcpy(ctx->buf, m->to, s_to);
memmove(ctx->buf+s_to, ctx->buf+s_from,
len + 1 - s_from);
}
break;
}
}
/* URIs only want one match unless overridden in the config */
if ((num_match > 0) && !(m->flags & M_NOTLAST))
break;
}
break;
case ATTR_EVENT:
for (m = themap; m; m = m->next) {
num_match = 0; /* reset here since we're working per-rule */
if (!(m->flags & M_EVENTS))
continue;
if (m->flags & M_REGEX) {
nmatch = 10;
offs = 0;
while (!ap_regexec(m->from.r, ctx->buf+offs,
nmatch, pmatch, 0)) {
match = pmatch[0].rm_so;
s_from = pmatch[0].rm_eo - match;
subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
nmatch, pmatch);
VERBOSE({
const char *f;
f = apr_pstrndup(ctx->f->r->pool,
ctx->buf + offs, s_from);
ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0,
ctx->f->r,
"E/RX: match at %s, substituting %s",
f, subs);
})
s_to = strlen(subs);
offs += match;
len = strlen(ctx->buf);
if (s_to > s_from) {
preserve(ctx, s_to - s_from);
memmove(ctx->buf+offs+s_to,
ctx->buf+offs+s_from,
len + 1 - s_from - offs);
memcpy(ctx->buf+offs, subs, s_to);
}
else {
memcpy(ctx->buf + offs, subs, s_to);
memmove(ctx->buf+offs+s_to,
ctx->buf+offs+s_from,
len + 1 - s_from - offs);
}
offs += s_to;
++num_match;
}
}
else {
found = strstr(ctx->buf, m->from.c);
if ((m->flags & M_ATSTART) && (found != ctx->buf))
continue;
while (found) {
s_from = strlen(m->from.c);
s_to = strlen(m->to);
match = found - ctx->buf;
if ((s_from < strlen(found))
&& (m->flags & M_ATEND)) {
found = strstr(ctx->buf+match+s_from,
m->from.c);
continue;
}
else {
found = strstr(ctx->buf+match+s_to,
m->from.c);
}
VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3,
0, ctx->f->r,
"E: matched %s, substituting %s",
m->from.c, m->to));
len = strlen(ctx->buf);
if (s_to > s_from) {
preserve(ctx, s_to - s_from);
memmove(ctx->buf+match+s_to,
ctx->buf+match+s_from,
len + 1 - s_from - match);
memcpy(ctx->buf+match, m->to, s_to);
}
else {
memcpy(ctx->buf+match, m->to, s_to);
memmove(ctx->buf+match+s_to,
ctx->buf+match+s_from,
len + 1 - s_from - match);
}
++num_match;
}
}
if (num_match && (m->flags & M_LAST))
break;
}
break;
case ATTR_IGNORE:
break;
}
}
if (!a[1])
ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL);
else {
if (ctx->cfg->flags != 0)
normalise(ctx->cfg->flags, ctx->buf);
/* write the attribute, using pcharacters to html-escape
anything that needs it in the value.
*/
ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL);
pcharacters(ctx, (const xmlChar*)ctx->buf, strlen(ctx->buf));
ap_fputc(ctx->f->next, ctx->bb, '"');
}
/* PR#64443: watch for accept-charset from backend */
if (accept_charset && !strcasecmp(a[0], "accept-charset")) {
accept_charset = NULL;
}
}
}
/* PR#64443: we've seen all we need, so add accept-charset if necessary */
if (accept_charset != NULL) {
ap_fprintf(ctx->f->next, ctx->bb, " accept-charset=\"%s\"",
accept_charset);
}
ctx->offset = 0;
if (desc && desc->empty)
ap_fputs(ctx->f->next, ctx->bb, ctx->etag);
else
ap_fputc(ctx->f->next, ctx->bb, '>');
if ((enforce > 0) && (required_attrs > 0)) {
/* if there are more required attributes than we found then complain */
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01420)
"HTML element %s is missing %d required attributes",
name, required_attrs);
}
}