00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifndef __HTML_PARSER_H__
00014 #define __HTML_PARSER_H__
00015 #include <libxml/xmlversion.h>
00016 #include <libxml/parser.h>
00017
00018 #ifdef LIBXML_HTML_ENABLED
00019
00020 #ifdef __cplusplus
00021 extern "C" {
00022 #endif
00023
00024
00025
00026
00027 typedef xmlParserCtxt htmlParserCtxt;
00028 typedef xmlParserCtxtPtr htmlParserCtxtPtr;
00029 typedef xmlParserNodeInfo htmlParserNodeInfo;
00030 typedef xmlSAXHandler htmlSAXHandler;
00031 typedef xmlSAXHandlerPtr htmlSAXHandlerPtr;
00032 typedef xmlParserInput htmlParserInput;
00033 typedef xmlParserInputPtr htmlParserInputPtr;
00034 typedef xmlDocPtr htmlDocPtr;
00035 typedef xmlNodePtr htmlNodePtr;
00036
00037
00038
00039
00040
00041 typedef struct _htmlElemDesc htmlElemDesc;
00042 typedef htmlElemDesc *htmlElemDescPtr;
00043 struct _htmlElemDesc {
00044 const char *name;
00045 char startTag;
00046 char endTag;
00047 char saveEndTag;
00048 char empty;
00049 char depr;
00050 char dtd;
00051 char isinline;
00052 const char *desc;
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064 const char** subelts;
00065 const char* defaultsubelt;
00066
00067 const char** attrs_opt;
00068 const char** attrs_depr;
00069 const char** attrs_req;
00070 };
00071
00072
00073
00074
00075 typedef struct _htmlEntityDesc htmlEntityDesc;
00076 typedef htmlEntityDesc *htmlEntityDescPtr;
00077 struct _htmlEntityDesc {
00078 unsigned int value;
00079 const char *name;
00080 const char *desc;
00081 };
00082
00083
00084
00085
00086 XMLPUBFUN const htmlElemDesc * XMLCALL
00087 htmlTagLookup (const xmlChar *tag);
00088 XMLPUBFUN const htmlEntityDesc * XMLCALL
00089 htmlEntityLookup(const xmlChar *name);
00090 XMLPUBFUN const htmlEntityDesc * XMLCALL
00091 htmlEntityValueLookup(unsigned int value);
00092
00093 XMLPUBFUN int XMLCALL
00094 htmlIsAutoClosed(htmlDocPtr doc,
00095 htmlNodePtr elem);
00096 XMLPUBFUN int XMLCALL
00097 htmlAutoCloseTag(htmlDocPtr doc,
00098 const xmlChar *name,
00099 htmlNodePtr elem);
00100 XMLPUBFUN const htmlEntityDesc * XMLCALL
00101 htmlParseEntityRef(htmlParserCtxtPtr ctxt,
00102 const xmlChar **str);
00103 XMLPUBFUN int XMLCALL
00104 htmlParseCharRef(htmlParserCtxtPtr ctxt);
00105 XMLPUBFUN void XMLCALL
00106 htmlParseElement(htmlParserCtxtPtr ctxt);
00107
00108 XMLPUBFUN htmlParserCtxtPtr XMLCALL
00109 htmlNewParserCtxt(void);
00110
00111 XMLPUBFUN htmlParserCtxtPtr XMLCALL
00112 htmlCreateMemoryParserCtxt(const char *buffer,
00113 int size);
00114
00115 XMLPUBFUN int XMLCALL
00116 htmlParseDocument(htmlParserCtxtPtr ctxt);
00117 XMLPUBFUN htmlDocPtr XMLCALL
00118 htmlSAXParseDoc (xmlChar *cur,
00119 const char *encoding,
00120 htmlSAXHandlerPtr sax,
00121 void *userData);
00122 XMLPUBFUN htmlDocPtr XMLCALL
00123 htmlParseDoc (xmlChar *cur,
00124 const char *encoding);
00125 XMLPUBFUN htmlDocPtr XMLCALL
00126 htmlSAXParseFile(const char *filename,
00127 const char *encoding,
00128 htmlSAXHandlerPtr sax,
00129 void *userData);
00130 XMLPUBFUN htmlDocPtr XMLCALL
00131 htmlParseFile (const char *filename,
00132 const char *encoding);
00133 XMLPUBFUN int XMLCALL
00134 UTF8ToHtml (unsigned char *out,
00135 int *outlen,
00136 const unsigned char *in,
00137 int *inlen);
00138 XMLPUBFUN int XMLCALL
00139 htmlEncodeEntities(unsigned char *out,
00140 int *outlen,
00141 const unsigned char *in,
00142 int *inlen, int quoteChar);
00143 XMLPUBFUN int XMLCALL
00144 htmlIsScriptAttribute(const xmlChar *name);
00145 XMLPUBFUN int XMLCALL
00146 htmlHandleOmittedElem(int val);
00147
00148 #ifdef LIBXML_PUSH_ENABLED
00149
00152 XMLPUBFUN htmlParserCtxtPtr XMLCALL
00153 htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax,
00154 void *user_data,
00155 const char *chunk,
00156 int size,
00157 const char *filename,
00158 xmlCharEncoding enc);
00159 XMLPUBFUN int XMLCALL
00160 htmlParseChunk (htmlParserCtxtPtr ctxt,
00161 const char *chunk,
00162 int size,
00163 int terminate);
00164 #endif
00165
00166 XMLPUBFUN void XMLCALL
00167 htmlFreeParserCtxt (htmlParserCtxtPtr ctxt);
00168
00169
00170
00171
00178 typedef enum {
00179 HTML_PARSE_RECOVER = 1<<0,
00180 HTML_PARSE_NOERROR = 1<<5,
00181 HTML_PARSE_NOWARNING= 1<<6,
00182 HTML_PARSE_PEDANTIC = 1<<7,
00183 HTML_PARSE_NOBLANKS = 1<<8,
00184 HTML_PARSE_NONET = 1<<11,
00185 HTML_PARSE_COMPACT = 1<<16
00186 } htmlParserOption;
00187
00188 XMLPUBFUN void XMLCALL
00189 htmlCtxtReset (htmlParserCtxtPtr ctxt);
00190 XMLPUBFUN int XMLCALL
00191 htmlCtxtUseOptions (htmlParserCtxtPtr ctxt,
00192 int options);
00193 XMLPUBFUN htmlDocPtr XMLCALL
00194 htmlReadDoc (const xmlChar *cur,
00195 const char *URL,
00196 const char *encoding,
00197 int options);
00198 XMLPUBFUN htmlDocPtr XMLCALL
00199 htmlReadFile (const char *URL,
00200 const char *encoding,
00201 int options);
00202 XMLPUBFUN htmlDocPtr XMLCALL
00203 htmlReadMemory (const char *buffer,
00204 int size,
00205 const char *URL,
00206 const char *encoding,
00207 int options);
00208 XMLPUBFUN htmlDocPtr XMLCALL
00209 htmlReadFd (int fd,
00210 const char *URL,
00211 const char *encoding,
00212 int options);
00213 XMLPUBFUN htmlDocPtr XMLCALL
00214 htmlReadIO (xmlInputReadCallback ioread,
00215 xmlInputCloseCallback ioclose,
00216 void *ioctx,
00217 const char *URL,
00218 const char *encoding,
00219 int options);
00220 XMLPUBFUN htmlDocPtr XMLCALL
00221 htmlCtxtReadDoc (xmlParserCtxtPtr ctxt,
00222 const xmlChar *cur,
00223 const char *URL,
00224 const char *encoding,
00225 int options);
00226 XMLPUBFUN htmlDocPtr XMLCALL
00227 htmlCtxtReadFile (xmlParserCtxtPtr ctxt,
00228 const char *filename,
00229 const char *encoding,
00230 int options);
00231 XMLPUBFUN htmlDocPtr XMLCALL
00232 htmlCtxtReadMemory (xmlParserCtxtPtr ctxt,
00233 const char *buffer,
00234 int size,
00235 const char *URL,
00236 const char *encoding,
00237 int options);
00238 XMLPUBFUN htmlDocPtr XMLCALL
00239 htmlCtxtReadFd (xmlParserCtxtPtr ctxt,
00240 int fd,
00241 const char *URL,
00242 const char *encoding,
00243 int options);
00244 XMLPUBFUN htmlDocPtr XMLCALL
00245 htmlCtxtReadIO (xmlParserCtxtPtr ctxt,
00246 xmlInputReadCallback ioread,
00247 xmlInputCloseCallback ioclose,
00248 void *ioctx,
00249 const char *URL,
00250 const char *encoding,
00251 int options);
00252
00253
00254
00255 typedef enum {
00256 HTML_NA = 0 ,
00257 HTML_INVALID = 0x1 ,
00258 HTML_DEPRECATED = 0x2 ,
00259 HTML_VALID = 0x4 ,
00260 HTML_REQUIRED = 0xc
00261 } htmlStatus ;
00262
00263
00264
00265
00266 XMLPUBFUN htmlStatus XMLCALL htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ;
00267 XMLPUBFUN int XMLCALL htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ;
00268 XMLPUBFUN htmlStatus XMLCALL htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ;
00269 XMLPUBFUN htmlStatus XMLCALL htmlNodeStatus(const htmlNodePtr, int) ;
00276 #define htmlDefaultSubelement(elt) elt->defaultsubelt
00277
00287 #define htmlElementAllowedHereDesc(parent,elt) \
00288 htmlElementAllowedHere((parent), (elt)->name)
00289
00295 #define htmlRequiredAttrs(elt) (elt)->attrs_req
00296
00297
00298 #ifdef __cplusplus
00299 }
00300 #endif
00301
00302 #endif
00303 #endif