/* Copyright 2006 Joachim Zobel . * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * This is mod_xml2. It has nothing to do with mod_xml. The name is as * it is because it is a wrapper to the gnome libxml2 and mod_libxml2 * is ugly. It is runs the libxml2 SAX2 parser and converts its input * into SAX buckets. These are SAX events wrapped into buckets. They * morph back into heap buckets if you call their bucket read function. * This means that nothing needs to be done to convert them back. * It also means that you have to be carefull, once you treat them as * ordinary buckets (e.g. by using an "ordinary" filter), they are. * So if you only run the xml2 filter on XML input you will rarely * notice it. Whitespace inside tags is normalized. * * It also provides functionality for converting portions of the document * into document trees and using tree transformation functions on them. * See tree_transform.h and mod_i18n.c on how to do this. * * This module should work as a drop in replacement for mod_expat, * which I will not maintain. * * Filters using SAX buckets currently are mod_i18n and mod_xi. * * The module should be able to run on large files, which is actually * the point with both SAX and Apache filters. Allocation of per request * memory is done once for every tag name, attribute name and namespace. * So as long as your XML file is not permanently introducing new tags * or new namespaces this is limited. Check sax_unify_name to see * what exactly happens. * * It is compiled and installed as expected with * apxs2 -i -c -I /usr/include/libxml2 mod_xml2.c buckets_sax.c * frag_buffer.c sax_util.c sxpath.c tree_transform.c * /usr/local/lib/libxml2.la * * Configuration * * The name of the filter for generating SAX buckets is sax. * * Configuration Directives * * XML2Parse xml|html * Context: server config, virtual host, directory, .htaccess * This switches from the libxml2 XML parser to the liberal * libxml2 HTML parser. * */ #include # ifdef S_SPLINT_S typedef unsigned long long off64_t; #endif #include #include #include #include #include #include #include #include #include #include #include module AP_MODULE_DECLARE_DATA xml2_module; //#include #include #include #include #include #include "frag_buffer.h" #include "buckets_sax.h" #include "sax_util.h" #include "sxpath.h" #include "tree_transform.h" typedef struct { // The parser state // This has to be first, since libxml2 // handlers assume the userData to be a // xmlParserCtxt. xmlParserCtxt parser; // The parser we are using parser_type_t parser_type; // The base struct (user context) sax_ctx *sax; // The amount of data that has been parsed apr_size_t sz_parsed; } xml2_ctx; #define xml2_get_sax_ctxt(fctx) (((xml2_ctx *)(fctx))->sax) /***************************************************************************** * Expat/XML2 Handlers *****************************************************************************/ /** * Push a namespace on the stack and append a start ns event to the * outgoing brigade. * Note that prefix and uri are unified by sax_bucket_create_ns. * @param c - the sax context * @param ns_prefix - the prefix * @param ns_uri - the uri */ static void xml2_ns_push_append(sax_ctx * c, const xml_char_t * ns_prefix, const xml_char_t * ns_uri) { // *_NS events are obsolteted by the bctx namespaces stack, // but maintained for compatibility. bucket_sax *bs = sax_bucket_create_ns(c, ns_prefix, ns_uri); sax_event_set_start_id(c, bs->event); start_ns_t *ns = apr_array_push(c->bctx.namespaces); ap_assert(ns); // copy the event on the namespaces stack *ns = *((start_ns_t *) bs->event); sax_bucket_append(c, bs); } /** * Pop the namespaces for a tag from the stack and append end events * to the outgoing brigade. * @param c - the sax context * @param ns_prefix - the prefix * @param ns_uri - the uri */ static void xml2_ns_pop_all_append(sax_ctx * c) { // We turn the namespaces on the stack into end_events apr_array_header_t *starts = c->starts; apr_array_header_t *namespaces = c->bctx.namespaces; while ((namespaces->nelts > 0) && (*sax_stack_top(se_id_t, starts) == sax_stack_top(start_ns_t, namespaces)->se_id)) { start_ns_t *sn = apr_array_pop(namespaces); bucket_sax *bs = sax_bucket_create_ns(c, sn->prefix, sn->uri); sax_bucket_set_which(bs, END_NS); sax_event_set_end_id(c, bs->event); sax_bucket_append(c, bs); } } /* * startElementNsSAX2Func */ static void xml2_bucket_create_start(void *ctx, const xmlChar * localname, const xmlChar * prefix, const xmlChar * URI, int nb_namespaces, const xmlChar ** namespaces, int nb_attributes, int nb_defaulted, const xmlChar ** attributes) { sax_ctx *c = xml2_get_sax_ctxt(ctx); const xmlChar *ns_prefix = NULL; const xmlChar *ns_uri = NULL; int i = 0; for (; i < nb_namespaces; i++) { ns_prefix = *(namespaces++); ns_uri = *(namespaces++); ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, c->bctx.r_log, "Start of namespace:%s, %s", ns_prefix, ns_uri); xml2_ns_push_append(c, ns_prefix, ns_uri); } // Create the start element event. bucket_sax *b = sax_bucket_create_elt_2(c, localname, prefix, URI, nb_attributes, attributes); sax_event_set_start_id(c, b->event); sax_bucket_append(c, b); } /* * endElementNsSAX2Func */ static void xml2_bucket_create_end(void *ctx, const xmlChar * localname, const xmlChar * prefix, const xmlChar * URI) { sax_ctx *c = xml2_get_sax_ctxt(ctx); // Create the end element event const xmlChar *atts = NULL; bucket_sax *bs = sax_bucket_create_elt_2(c, localname, prefix, URI, 0, NULL); sax_bucket_set_which(bs, END_ELT); sax_event_set_end_id(c, bs->event); sax_bucket_append(c, bs); // We turn the namespaces on the stack into end_events xml2_ns_pop_all_append(c); } /* * startElementSAXFunc */ static void xml2_bucket_create_start_1(void *ctx, const xmlChar * name, const xmlChar ** atts) { sax_ctx *c = xml2_get_sax_ctxt(ctx); static const xml_char_t *empty[2] = { NULL, NULL }; if (!atts) { atts = empty; } apr_size_t off = 0; // Search for namespace decl. const xmlChar **attr = NULL; for (attr = atts; *attr; attr += 2) { *(attr - off) = *attr; *(attr - off + 1) = *(attr + 1); if (!strncmp("xmlns", *attr, 5)) { const xml_char_t *ns_uri = *(attr + 1); const xml_char_t *ns_prefix = NULL; if ((*attr)[5] == ':') { ns_prefix = *attr + 6; } xml2_ns_push_append(c, ns_prefix, ns_uri); off += 2; } } *(attr - off) = *attr; *(attr - off + 1) = *(attr + 1); bucket_sax *bs = sax_bucket_create_elt(c, name, atts); sax_event_set_start_id(c, bs->event); sax_bucket_append(c, bs); } /* * endElementSAXFunc * This acually creates a start buckets and resets the type. This accepts a small * memory overhead for avoiding code duplication. */ static void xml2_bucket_create_end_1(void *ctx, const xmlChar * name) { sax_ctx *c = xml2_get_sax_ctxt(ctx); const xmlChar *atts = NULL; bucket_sax *bs = sax_bucket_create_elt(c, name, &atts); sax_bucket_set_which(bs, END_ELT); sax_event_set_end_id(c, bs->event); sax_bucket_append(c, bs); xml2_ns_pop_all_append(c); } /* * xmlCharacterHandler */ static void xml2_bucket_create_character(void *ctx, const xmlChar * buf, int len) { sax_ctx *c = xml2_get_sax_ctxt(ctx); bucket_sax *bs = sax_bucket_create_char(c, buf, len, 1); sax_bucket_append(c, bs); } /* * commentSAXFunc */ static void xml2_bucket_create_comment(void *ctx, const xmlChar * buf) { sax_ctx *c = xml2_get_sax_ctxt(ctx); bucket_sax *bs = sax_bucket_create_char(c, buf, strlen(buf), 0); sax_bucket_set_which(bs, COMMENT); sax_bucket_append(c, bs); } /* * XML_XmlDeclHandler */ static void xml2_bucket_create_xml_decl(void *ctx) { // This does some parser initializations // xmlSAX2StartDocument(ctx); xml2_ctx *xc = ctx; sax_ctx *c = xml2_get_sax_ctxt(ctx); bucket_sax *bs = sax_bucket_create_xml_decl(c, xc->parser.version, xc->parser.encoding, xc->parser.standalone); sax_bucket_append(c, bs); } /* * processingInstructionSAXFunc */ static void xml2_bucket_create_proc_instr(void *ctx, const xmlChar * target, const xmlChar * data) { sax_ctx *c = xml2_get_sax_ctxt(ctx); bucket_sax *bs = sax_bucket_create_proc_instr(c, target, data); sax_bucket_append(c, bs); } /* * XML_DefaultHandler */ /* static void xml2_bucket_create_default(void *ctx, const xmlChar * buf, int len) { sax_ctx *c = ctx; bucket_sax *bs = sax_bucket_create_char(c, buf, len, 0); sax_bucket_set_which(bs, DEFAULT); sax_bucket_append(c, bs); } */ /* * ignorableWhitespaceSAXFunc */ static void xml2_bucket_create_white(void *ctx, const xmlChar * buf, int len) { sax_ctx *c = xml2_get_sax_ctxt(ctx); bucket_sax *bs = sax_bucket_create_char(c, buf, len, 1); //sax_bucket_set_which(bs, WHITE); sax_bucket_append(c, bs); } /* * cdataBlockSAXFunc */ static void xml2_bucket_create_cdata(void *ctx, const xmlChar * buf, int len) { sax_ctx *c = xml2_get_sax_ctxt(ctx); sax_bucket_append(c, sax_bucket_create_empty(c, START_CD)); sax_bucket_append(c, sax_bucket_create_char(c, buf, len, 0)); sax_bucket_append(c, sax_bucket_create_empty(c, END_CD)); } /** * Common implementation for error and warning callbacks. */ static void xml2_sax_log(void *ctx, int level, const char *msg, va_list ap) { xml2_ctx *xc = ctx; sax_ctx *sc = xml2_get_sax_ctxt(ctx); const char *err_xml = apr_pvsprintf(sc->bctx.p_tmp, msg, ap); ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, sc->r, "XML parse error line %d: %s", xc->parser.input->line, err_xml); } /* * warningSAXFunc */ static void xml2_sax_warning(void *ctx, const char *msg, ...) { va_list ap; va_start(ap, msg); xml2_sax_log(ctx, APLOG_WARNING, msg, ap); va_end(ap); } /* * errorSAXFunc */ static void xml2_sax_error(void *ctx, const char *msg, ...) { va_list ap; va_start(ap, msg); xml2_sax_log(ctx, APLOG_ERR, msg, ap); va_end(ap); } /***************************************************************************** * Interface Functions *****************************************************************************/ /** * Aborts parsing for the given filter, assuming it is an xml2 filter * @param r - An xml2 filter */ static void mod_xml2_abort_filter(ap_filter_t * f) { xml2_ctx *ctx = f->ctx; // XML_StopParser(ctx->parser, XML_FALSE); } /* * mod_xml2_abort */ void mod_xml2_abort(request_rec * r) { ap_filter_t *f = r->output_filters; /* We walk the filter chain and abort all xml2 parsers */ for (; f; f = f->next) { if (strcmp(f->frec->name, "xml2") == 0) { mod_xml2_abort_filter(f); } } } /***************************************************************************** * Module Handlers *****************************************************************************/ /* * Helper from mod_xmlns */ static char *ctype2encoding(apr_pool_t * pool, const char *in) { char *x; char *ptr; char *ctype; if (!in) return 0; ctype = apr_pstrdup(pool, in); for (ptr = ctype; *ptr; ++ptr) if (isupper(*ptr)) *ptr = tolower(*ptr); if (ptr = strstr(ctype, "charset="), ptr > 0) { /* jump over "charset=" and chop * anything that follows charset */ ptr += 8; if (x = strpbrk(ptr, " ;"), x != NULL) *x = 0; } x = ptr ? apr_pstrdup(pool, ptr) : 0; return x; } /***************************************************************************** * mod_xml2 memory handling used by libxml2 * * The reason that this exists is that the SAX2 interface assumes (some of?) * the strings it receices to be allocated by libxml. It tries to free them * when the tree is freed. *****************************************************************************/ const char *const MARK = "It is the pointer that is relevant"; static void *xml2_alloc(size_t sz) { if (!sz) { return NULL; } void *mem = malloc(sz + sizeof(MARK)); const char **mark = mem; *mark = MARK; return (char *) mem + sizeof(MARK); } static void xml2_free(void *emem) { if (!emem) { return; } void *mem = (char *) emem - sizeof(MARK); const char **mark = mem; if (*mark == MARK) { free(mem); } } static void *xml2_realloc(void *emem, size_t sz) { void *mem = (char *) emem - sizeof(MARK); if (!emem) { mem = NULL; } // We assume that reallocing only happens // to marked libxml2-memory. ap_assert(*((const char **) mem) == MARK); mem = realloc(mem, sz + sizeof(MARK)); const char **mark = mem; *mark = MARK; return (char *) mem + sizeof(MARK); } static char *xml2_strdup(const char *str) { const size_t sz = strlen(str); return strcpy(xml2_alloc(sz) + 1, str); } /* * The important feature of the implementation below * is that free does nothing. * static apr_pool_t *volatile p_cur = NULL; static void *xml2_alloc(size_t sz) { ap_log_perror(APLOG_MARK, APLOG_DEBUG, 0, p_cur, "xml2_alloc %d bytes.", sz); // unfortunately we need to keep track of the size to // support a useless realloc. void *mem = apr_palloc(p_cur, sz + sizeof(size_t)); size_t *psz = mem; *psz = sz; return (char *) mem + sizeof(size_t); } static void xml2_free(void *emem) { } static void *xml2_realloc(void *emem, size_t sz) { void *mem = xml2_alloc(sz); if (emem) { size_t *psz = (size_t *) ((char *) emem - sizeof(size_t)); memcpy(mem, emem, *psz); } return mem; } static char *xml2_strdup(const char *str) { // The situation get worse because we can not be shure that // a strduped string is not reallooced later. const size_t sz = strlen(str); return strcpy(xml2_alloc(sz), str); } static int xml2_gc_mem_setup() { } void xml2_set_current_pool(apr_pool_t * p) { p_cur = p; } */ void xml2_set_current_pool(apr_pool_t * p) { } static void xml2_child_init(apr_pool_t * p, server_rec * s) { xml2_set_current_pool(p); xmlGcMemSetup(xml2_free, xml2_alloc, xml2_alloc, xml2_realloc, xml2_strdup); } /* static XML_Memory_Handling_Suite xml2_mm = { xml2_alloc, xml2_realloc, xml2_free }; */ /***************************************************************************** * Parser wrappers *****************************************************************************/ typedef enum { PARSE_OK, PARSE_ERR, PARSE_ABORT } parse_status_t; #define MIN(x,y) ((x)<(y)?(x):(y)) /** * Parses the XML input * @param sctx - The SAX context * @param - r_log - The request * @return - 0 if sucessful */ static parse_status_t mod_xml2_parse(xml2_ctx * ectx, const char *buf, apr_size_t len, int end) { parse_status_t rv = PARSE_OK; request_rec *r_log = ectx->sax->bctx.r_log; apr_pool_t *p_tmp = ectx->sax->bctx.p_tmp; ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r_log, "libxml2 is about to parse %d bytes.", len); xmlParserErrors err; switch (ectx->parser_type) { case XML: err = xmlParseChunk(&ectx->parser, buf, len, end); break; case HTML: err = htmlParseChunk(&ectx->parser, buf, len, end); break; default: ap_assert(FALSE); } if (err != 0) { if (1 //XXX: Which code on abort? ) { rv = PARSE_ABORT; } else { rv = PARSE_ERR; } // Logging is done from the SAX error handler. // So this is just to be shure. ap_log_rerror(APLOG_MARK, /* Abort is not an error */ (rv == PARSE_ERR) ? APLOG_ERR : APLOG_DEBUG, 0, r_log, "Parse Error %d:", err); } return rv; } /** * Create a SAX handler * @return the initialized sax handler struct */ static int sax_handler_initialized = 0; static xmlSAXHandler *mod_xml2_get_sax_handler(parser_type_t type) { // The SAX callbacks static xmlSAXHandler handler1, handler2; if (!sax_handler_initialized) { memset(&handler2, 0, sizeof(xmlSAXHandler)); //xmlSAX2InitDefaultSAXHandler(&fctx->handler, 0); /* * Set handlers */ #define CALLBACK2(fn,hdl) handler2.fn = hdl CALLBACK2(startDocument, xml2_bucket_create_xml_decl); CALLBACK2(comment, xml2_bucket_create_comment); CALLBACK2(startElementNs, xml2_bucket_create_start); CALLBACK2(endElementNs, xml2_bucket_create_end); CALLBACK2(ignorableWhitespace, xml2_bucket_create_white); CALLBACK2(cdataBlock, xml2_bucket_create_cdata); CALLBACK2(characters, xml2_bucket_create_character); CALLBACK2(processingInstruction, xml2_bucket_create_proc_instr); CALLBACK2(warning, xml2_sax_warning); CALLBACK2(error, xml2_sax_error); // mark as SAX2 handler2.initialized = XML_SAX2_MAGIC; #define CALLBACK1(fn,hdl) handler1.fn = hdl CALLBACK1(startDocument, xml2_bucket_create_xml_decl); CALLBACK1(comment, xml2_bucket_create_comment); CALLBACK1(startElement, xml2_bucket_create_start_1); CALLBACK1(endElement, xml2_bucket_create_end_1); CALLBACK1(ignorableWhitespace, xml2_bucket_create_white); CALLBACK1(cdataBlock, xml2_bucket_create_cdata); CALLBACK1(characters, xml2_bucket_create_character); CALLBACK1(processingInstruction, xml2_bucket_create_proc_instr); CALLBACK1(warning, xml2_sax_warning); CALLBACK1(error, xml2_sax_error); // mark as SAX1 handler1.initialized = 1; sax_handler_initialized = 1; } switch (type) { case XML: return &handler2; case HTML: return &handler1; default: ap_assert(FALSE); return NULL; } } /** * Create an xml2_ctx with an initialized parser * @param - r - The request * @return - The xml2 context */ static xml2_ctx *mod_xml2_setup_parser(request_rec * r, parser_type_t type) { // This replaces the parser context and will therefore // be freed by libxml2. xml2_ctx *fctx = xmlMemMalloc(sizeof(xml2_ctx)); memset(fctx, 0, sizeof(xml2_ctx)); /* set up the parser */ fctx->sz_parsed = 0; fctx->parser_type = type; xmlParserCtxt *parser = NULL; switch (type) { case XML: { parser = xmlCreatePushParserCtxt(mod_xml2_get_sax_handler(type), fctx, NULL, 0, NULL); break; } case HTML: { const char *enc = ctype2encoding(r->pool, r->content_type); parser = htmlCreatePushParserCtxt(mod_xml2_get_sax_handler(type), fctx, NULL, 0, NULL, xmlParseCharEncoding(enc)); break; } default: ap_assert(FALSE); } if (!parser) { ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, "xmlCreatePushParserCtxt failed"); // for enc. %s", enc); } else { // Copy the parser context fctx->parser = *parser; // and free it. xmlFree(parser); apr_pool_cleanup_register(r->pool, fctx, (void *) ((type == HTML) ? htmlFreeParserCtxt : xmlFreeParserCtxt), apr_pool_cleanup_null); } return fctx; } /***************************************************************************** * The main filter *****************************************************************************/ /* * xml2_filter_init */ static int xml2_filter_init(ap_filter_t * f) { // Workaround: filter_init can get multiple calls. if (f->ctx) return OK; xml2_cfg *cfg = ap_get_module_config(f->r->per_dir_config, &xml2_module); xml2_ctx *fctx = f->ctx = mod_xml2_setup_parser(f->r, cfg->parser_type); sax_ctx *sctx = fctx->sax = apr_palloc(f->r->pool, sizeof(sax_ctx)); request_rec *r = f->r; ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "xml2_filter_init called."); xml2_tree_log_filter_chain(APLOG_MARK, f); /* Chunked encoding enables HTTP keepalive * and removes Content-Length */ if (r->proto_num >= 1001) { if (!r->main && !r->prev) r->chunked = 1; } /* but for the else cases */ apr_table_unset(r->headers_out, "Content-Length"); /* Init the sax_ctx */ sax_ctx_init(sctx, /* with a newly created brigade */ apr_brigade_create(r->pool, f->c->bucket_alloc), f, mod_xml2_abort_filter); // XXX: char *enc = ctype2encoding(r->pool, r->content_type); return OK; } /* static int mod_xml2_no_mmap(apr_bucket *b) { if (strcmp(b->type->name, "FILE") == 0) { apr_bucket_file *a = b->data ; a->can_mmap = 0 ; } return APR_SUCCESS ; } */ /* * xml2_filter */ static int xml2_filter(ap_filter_t * f, apr_bucket_brigade * bb) { apr_bucket *b; apr_bucket *del = NULL; const char *buf = NULL; apr_size_t bytes = 0; apr_status_t rv = APR_SUCCESS; xml2_ctx *ctxt = f->ctx; sax_ctx *sctx = ctxt->sax; request_rec *r_log = sctx->bctx.r_log; ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r_log, "xml2_filter called. APR_BUCKET_ALLOC_SIZE is %d", APR_BUCKET_ALLOC_SIZE); xml2_tree_log_filter_chain(APLOG_MARK, f); if (APR_BUCKET_IS_EOS(APR_BRIGADE_FIRST(bb))) { // Empty brigade, calling mod_xml2_parse // would give an error. ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "Passing brigade to %s.", f->next->frec->name); return ap_pass_brigade(f->next, bb); } for (b = APR_BRIGADE_FIRST(bb); b != APR_BRIGADE_SENTINEL(bb); b = APR_BUCKET_NEXT(b)) { // del can now be deleted if (del) { apr_bucket_delete(del); del = NULL; } // Basic consistency ap_assert(b->list == bb->bucket_alloc); if (APR_BUCKET_IS_EOS(b)) { ap_assert(APR_BUCKET_NEXT(b) == APR_BRIGADE_SENTINEL(bb)); ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r_log, "EOS bucket found."); if (PARSE_ABORT == mod_xml2_parse(ctxt, buf, 0, 1)) { return sctx->rv; } // This the end anyway, so no special // handling of PARSE_ERR takes place. } if (APR_BUCKET_IS_METADATA(b)) { apr_bucket *p = NULL; apr_bucket_copy(b, &p); APR_BRIGADE_INSERT_TAIL(sctx->bb, p); } else { // this is the main parser call if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ) == APR_SUCCESS) { switch (mod_xml2_parse(ctxt, buf, bytes, 0)) { case PARSE_OK: // We need to delete the bucket. // With a large file doing this at // brigades end is too far away. // We can however not delete it now, // since it knows the next bucket. del = b; ctxt->sz_parsed += bytes; break; case PARSE_ABORT: return sctx->rv; case PARSE_ERR: APR_BRIGADE_INSERT_TAIL(sctx->bb, apr_bucket_eos_create(sctx-> bb-> bucket_alloc)); ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "Passing brigade to %s.", f->next->frec->name); ap_pass_brigade(f->next, sctx->bb); //XXX: What is a good error return here? return -500; } } else { ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r_log, "Error in bucket read"); } } sax_check_pool(sctx->r->pool); } ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r_log, "End of brigade."); // This is not ours apr_brigade_cleanup(bb); // We are done, so we pass the new buckets return sax_pass_buckets(sctx, 0); } /***************************************************************************** * Utility filters *****************************************************************************/ /** * Does nothing */ static int xml2_null_filter(ap_filter_t * f, apr_bucket_brigade * bb) { ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "xml2_null_filter called."); xml2_tree_log_filter_chain(APLOG_MARK, f); apr_brigade_cleanup(bb); } /** * Removes the leading XML_DECL and the trailing EOS bucket. */ static int xml2_pre_include_filter(ap_filter_t * f, apr_bucket_brigade * bb) { apr_bucket *b; apr_status_t rv = APR_SUCCESS; apr_bucket *b_xml_decl = NULL; // apr_bucket_brigade *bbnx = // apr_brigade_create(f->r->pool, f->r->connection->bucket_alloc) ; ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "xml2_pre_include_filter called."); xml2_tree_log_filter_chain(APLOG_MARK, f); for (b = APR_BRIGADE_FIRST(bb); b != APR_BRIGADE_SENTINEL(bb); b = APR_BUCKET_NEXT(b)) { if (BUCKET_IS_SAX(b)) { bucket_sax *bs = b->data; /* We search for the XML decl. */ if (bs->which == XML_DECL) { b_xml_decl = b; break; } /* Only (empty) char buckets can be before it */ if (bs->which != CHARACTER) { break; } } else { break; } } // Remove XML_DECL if (b_xml_decl) { apr_bucket_delete(b_xml_decl); ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "XML_DECL bucket removed."); } // Remove EOS b = APR_BRIGADE_LAST(bb); if (APR_BUCKET_IS_EOS(b)) { apr_bucket_delete(b); ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "EOS bucket removed."); } /* We are done, so we pass the brigade */ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, "Passing brigade to %s.", f->next->frec->name); rv = ap_pass_brigade(f->next, bb); return rv; } /***************************************************************************** * Configuration *****************************************************************************/ /* * xml2_parse */ static const char *xml2_parse(cmd_parms * cmd, void *cf, const char *arg) { xml2_cfg *cfg = cf; if (!strcasecmp(arg, "xml")) { cfg->parser_type = XML; return NULL; } if (!strcasecmp(arg, "html")) { cfg->parser_type = HTML; return NULL; } return "The value of XML2Parse must be html or xml."; } static const command_rec xml2_cmds[] = { // XML2FilterCond newname condfilter xpath AP_INIT_TAKE123("XML2FilterCond", xml2_filter_cond_create, NULL, OR_ALL, NULL), AP_INIT_TAKE1("XML2Parse", xml2_parse, NULL, OR_ALL, "Choose between xml and html parser."), {NULL} }; static void *cr_xml2_cfg(apr_pool_t * pool, char *x) { xml2_cfg *cfg = apr_pcalloc(pool, sizeof(xml2_cfg)); cfg->fconds = apr_hash_make(pool); // Default is XML cfg->parser_type = XML; return cfg; } static void *merge_xml2_cfg(apr_pool_t * pool, void *BASE, void *ADD) { xml2_cfg *base = BASE; xml2_cfg *add = ADD; xml2_cfg *cfg = apr_palloc(pool, sizeof(xml2_cfg)); cfg->fconds = apr_hash_overlay(pool, add->fconds, base->fconds); cfg->parser_type = add->parser_type; return cfg; } /***************************************************************************** * The usual module stuff *****************************************************************************/ static void xml2_hooks(apr_pool_t * p) { ap_hook_child_init(xml2_child_init, NULL, NULL, APR_HOOK_MIDDLE); // The external name of the filter is sax, since it is implemented // by multiple modules. ap_register_output_filter("sax", xml2_filter, xml2_filter_init, AP_FTYPE_RESOURCE); ap_register_output_filter("null", xml2_null_filter, NULL, AP_FTYPE_RESOURCE); ap_register_output_filter("pre_include", xml2_pre_include_filter, NULL, AP_FTYPE_RESOURCE); ap_register_output_filter("_transform", xml2_tree_filter, xml2_tree_filter_init, AP_FTYPE_RESOURCE); // APR_REGISTER_OPTIONAL_FN(mod_xml2_abort); } module AP_MODULE_DECLARE_DATA xml2_module = { STANDARD20_MODULE_STUFF, cr_xml2_cfg, merge_xml2_cfg, NULL, NULL, xml2_cmds, xml2_hooks };