/* Copyright 2006 Joachim Zobel . * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "apr.h" #include "apr_general.h" #include "apr_tables.h" #ifndef XML_CHAR_T #define XML_CHAR_T typedef char xml_char_t ; #endif typedef enum { START_ELT, END_ELT, START_NS, END_NS, PROC_INSTR, START_CD, END_CD, CHARACTER, DEFAULT, WHITE, /* additional events */ COMMENT, XML_DECL } sax_event_e ; /* * This table is used as a sets of names * by inserting names as key and value */ typedef apr_table_t unq_set_t; typedef struct { /* * These are the sets used for the unification * of the types of names used. */ unq_set_t *uri ; unq_set_t *name ; unq_set_t *prefix ; } all_unq_t ; typedef apr_array_header_t bucket_stack_t; /* * Request global stuff needed by all sax buckets */ typedef struct { /* The per request fragment buffer */ frag_buffer_t *frag_buf ; /* * Unfortunaltely SAX buckets are not independent. * So one purpose of this context is communication * between them. */ /* * The newns is a stack of start_ns buckets. Copies of * the original buckets are used. This is to use ref. * counting to prevent deletion of the data. * These need to be added as attributes to the next inner * tag. Afterwards the buckets have to be destroyed. */ bucket_stack_t *newns ; /* Used only for logging */ // request_rec *r ; } morph_ctx ; typedef void (*frag_write_f)(void *event, morph_ctx *mctx) ; /* * Request global stuff needed by all sax buckets */ typedef struct { /* name unification */ all_unq_t unq ; /* Used only for logging */ request_rec *r_log ; /* pool for short lived memory, frequently cleared */ apr_pool_t *p_tmp ; /* add up bucket mem. usage */ apr_size_t sum_mem ; } bucket_ctx ; /* * The event holder aka the sax bucket */ typedef struct { /* This is the base class*/ apr_bucket_refcount shared ; /* These are the "essential" fields */ sax_event_e which ; void *event ; /* member function for writing */ frag_write_f frag_write ; /* The per request globals */ bucket_ctx *bctx ; /* The per request globals */ morph_ctx *mctx ; /* data memory used by the event */ // void *add_mem ; } bucket_sax ; typedef void (*abort_fn)(ap_filter_t *f) ; /* * The parser context */ typedef struct { /* The base class * This remains accessible after parsing. */ bucket_ctx bctx ; /* * Passing state between buckets */ /* Are we in a cdata section? */ int is_cdata ; /* * The starts stack holds the current path of start buckets. * It is used to assign the se_id_t ids to the end buckets. */ apr_array_header_t *starts ; /* * General purpose/technical */ /* pool with parser lifetime */ apr_pool_t *pool ; /* bucket brigade to append new buckets */ apr_bucket_brigade *bb ; /* The bucket allocator */ apr_bucket_alloc_t *list ; /* to keep the -> path short */ request_rec *r ; /* The parsing filter */ ap_filter_t *f ; /* abort parsing */ abort_fn abort ; /* * The sax_pass_buckets return value. Unfortunately This is * the way to return a value from a parser callback. */ apr_status_t rv ; /* * This is only initialized and then passed to all SAX buckets. */ morph_ctx *mctx ; } sax_ctx ; /* The seperator for Expat name components * XXX: This this an expatism? */ const xml_char_t SEP_NS ; /***************************************************************************** * SAX events et al *****************************************************************************/ /** * SAX events that consist of a start and an and tag get a unique id. While * the start tag gets a positive value the associated end tag value gets * the start tags negative value. */ typedef long se_id_t ; /** * ns_name_t holds the name seperated into name and prefix * and the uri. * */ typedef struct { /* The namespace uri */ const xml_char_t *uri ; /* The name (without prefix) */ const xml_char_t *name ; /* The prefix */ const xml_char_t *prefix ; } ns_name_t ; typedef struct { ns_name_t name ; const xml_char_t *value ; } attr_t; typedef struct { se_id_t se_id ; ns_name_t name ; attr_t *atts ; } start_elt_t ; typedef struct { se_id_t se_id ; ns_name_t name ; } end_elt_t ; typedef struct { se_id_t se_id ; const xml_char_t *prefix ; const xml_char_t *uri ; } start_ns_t ; typedef struct { se_id_t se_id ; const xml_char_t *prefix ; } end_ns_t ; typedef struct { apr_size_t len ; int encode ; /* text holds len characters and a term. \0. */ xml_char_t text[] ; } character_t ; typedef struct { const xml_char_t *version ; const xml_char_t *encoding ; int standalone ; } xml_decl_t ; typedef struct { const xml_char_t *target ; const xml_char_t *data ; } proc_instr_t ; /***************************************************************************** * Functions *****************************************************************************/ /** * This function is used to have have exactly one string for each name. * Assuming the number of names is limited this makes it is possible to * use request pool memory for names without memory consumption growing * linear with the number of buckets. * @param p - The pool to allocate unknown names from. * @param set - The table that holds all names. * @param name - The name that shall be unified. * @return The */ const xml_char_t *sax_unify_name(apr_pool_t *p, unq_set_t *set, const xml_char_t *name) ; /** * Set a start id for a SAX event * @param ctx - The sax ctx * @param data - The SAX event */ void sax_event_set_start_id(sax_ctx *ctx, void *event) ; /** * Set an end id for a SAX event * @param ctx - The sax ctx * @param data - The SAX event */ void sax_event_set_end_id(sax_ctx *ctx, void *event) ; /** * Passes the sax bucket up to the next filter * @param sctx - The sax context * @do_abort - Call the contexts abort fn, if passing indicates this. * @return The ap_pass_brigade return */ apr_status_t sax_pass_buckets(sax_ctx *sctx, int do_abort) ; /** * Takes the given sax bucket, wraps it into a bucket and appends it * to the given brigade. * @param bb - The bucket brigade. * @param e - The The sax bucket. * @return The newly created and appended bucket. */ apr_bucket *sax_bucket_append(sax_ctx *ctx, bucket_sax *e) ; /** * Initialises the sax_ctx struct. * @param ctx - Will be initialized * @param bb - The current bucket brigade (to which sax * events are appended). * @param f - The parser filter * @param abort - The funtion to abort parsing */ void sax_ctx_init(sax_ctx *ctx, apr_bucket_brigade *bb, ap_filter_t *f, abort_fn abort) ; /** * Creates a request for log usage. With filters, frequent use of * the request pool is effectively a memory leak. The ap_perror_log * do however not know the log level, since they do not know * the server_rec. As a workaround we create a request_rec, that * uses a subpool that is frequently cleared. * @param r - The request. * @return The request that can be used for logging */ // request_rec *sax_log_req_create(request_rec *r) ; /** * Clears the request_recs pool * @param r - The request. * @return The request that can be used for logging */ // void sax_log_req_clear(request_rec *r) ; /** * Creates a sax (start) namespace bucket. * @param c - The sax context. * @param prefix - The ns prefix * @param uri - The ns URI * @return The sax bucket that was created. */ bucket_sax *sax_bucket_create_ns(sax_ctx *c, const xml_char_t *prefix, const xml_char_t *uri) ; /** * Set a sax buckets type. * @param bs - The bucket. * @param which - The sax event type of the bucket. */ void sax_bucket_set_which(bucket_sax *bs, sax_event_e which) ; /** * Creates a sax (start) element bucket. * @param c - The sax context. * @param name - The 3-part name * @param atts - A name, value, name, value... array of attributes, * where the names a 3-part. * @return The sax bucket that was created. */ bucket_sax *sax_bucket_create_elt(sax_ctx *c, const xml_char_t* name, const xml_char_t** atts) ; /** * Creates an empty sax bucket. * @param c - The sax context. * @param which - The sax event type of the bucket. * @return The sax bucket that was created. */ bucket_sax *sax_bucket_create_empty(sax_ctx *c, sax_event_e which) ; /** * Creates a sax character bucket. * @param c - The sax context. * @param buf - The the text. * @param len - The length of the text. * @param encode - do XML encoding when morphing. * @return The sax bucket that was created. */ bucket_sax *sax_bucket_create_char(sax_ctx *c, const xml_char_t* ebuf, int len, int encode) ; /** * Creates a sax XML declaration bucket. * @param c - The sax context. * @param version - The version. * @param encoding - The encoding. * @param standalone - The standalone attribute was given. * @return The sax bucket that was created. */ bucket_sax *sax_bucket_create_xml_decl(sax_ctx *c, const xml_char_t* version, const xml_char_t* encoding, int standalone) ; /** * Creates a sax procssing instruction bucket. * @param c - The sax context. * @param target - The target (the string after the