Details
htmlParserCtxt
typedef xmlParserCtxt htmlParserCtxt; |
htmlParserCtxtPtr
typedef xmlParserCtxtPtr htmlParserCtxtPtr; |
htmlParserNodeInfo
typedef xmlParserNodeInfo htmlParserNodeInfo; |
htmlSAXHandler
typedef xmlSAXHandler htmlSAXHandler; |
htmlSAXHandlerPtr
typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; |
htmlParserInput
typedef xmlParserInput htmlParserInput; |
htmlParserInputPtr
typedef xmlParserInputPtr htmlParserInputPtr; |
htmlDocPtr
typedef xmlDocPtr htmlDocPtr; |
htmlNodePtr
typedef xmlNodePtr htmlNodePtr; |
struct htmlElemDesc
struct htmlElemDesc {
const char *name; /* The tag name */
char startTag; /* Whether the start tag can be implied */
char endTag; /* Whether the end tag can be implied */
char saveEndTag; /* Whether the end tag should be saved */
char empty; /* Is this an empty element ? */
char depr; /* Is this a deprecated element ? */
char dtd; /* 1: only in Loose DTD, 2: only Frameset one */
char isinline; /* is this a block 0 or inline 1 element */
const char *desc; /* the description */
}; |
htmlElemDescPtr
typedef htmlElemDesc *htmlElemDescPtr; |
struct htmlEntityDesc
struct htmlEntityDesc {
unsigned int value; /* the UNICODE value for the character */
const char *name; /* The entity name */
const char *desc; /* the description */
}; |
htmlEntityDescPtr
typedef htmlEntityDesc *htmlEntityDescPtr; |
htmlTagLookup ()
Lookup the HTML tag in the ElementTable
htmlEntityLookup ()
Lookup the given entity in EntitiesTable
TODO: the linear scan is really ugly, an hash table is really needed.
htmlEntityValueLookup ()
Lookup the given entity in EntitiesTable
TODO: the linear scan is really ugly, an hash table is really needed.
htmlIsAutoClosed ()
The HTML DTD allows a tag to implicitly close other tags.
The list is kept in htmlStartClose array. This function checks
if a tag is autoclosed by one of it's child
htmlAutoCloseTag ()
The HTML DTD allows a tag to implicitly close other tags.
The list is kept in htmlStartClose array. This function checks
if the element or one of it's children would autoclose the
given tag.
htmlParseEntityRef ()
parse an HTML ENTITY references
[68] EntityRef ::= '&' Name ';'
htmlParseCharRef ()
parse Reference declarations
[66] CharRef ::= '&#' [0-9]+ ';' |
'&x' [0-9a-fA-F]+ ';'
htmlParseElement ()
parse an HTML element, this is highly recursive
[39] element ::= EmptyElemTag | STag content ETag
[41] Attribute ::= Name Eq AttValue
htmlSAXParseDoc ()
Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks
to handle parse events. If sax is NULL, fallback to the default DOM
behavior and return a tree.
htmlParseDoc ()
parse an HTML in-memory document and build a tree.
htmlSAXParseFile ()
parse an HTML file and build a tree. Automatic support for ZLIB/Compress
compressed document is provided by default if found at compile-time.
It use the given SAX function block to handle the parsing callback.
If sax is NULL, fallback to the default DOM tree building routines.
htmlParseFile ()
htmlDocPtr htmlParseFile (const char *filename,
const char *encoding); |
parse an HTML file and build a tree. Automatic support for ZLIB/Compress
compressed document is provided by default if found at compile-time.
UTF8ToHtml ()
int UTF8ToHtml (unsigned char *out,
int *outlen,
unsigned char *in,
int *inlen); |
Take a block of UTF-8 chars in and try to convert it to an ASCII
plus HTML entities block of chars out.
htmlEncodeEntities ()
int htmlEncodeEntities (unsigned char *out,
int *outlen,
unsigned char *in,
int *inlen,
int quoteChar); |
Take a block of UTF-8 chars in and try to convert it to an ASCII
plus HTML entities block of chars out.
htmlIsScriptAttribute ()
int htmlIsScriptAttribute (const xmlChar *name); |
Check if an attribute is of content type Script
htmlHandleOmittedElem ()
int htmlHandleOmittedElem (int val); |
Set and return the previous value for handling HTML omitted tags.
htmlFreeParserCtxt ()
Free all the memory used by a parser context. However the parsed
document in ctxt->myDoc is not freed.
htmlCreatePushParserCtxt ()
Create a parser context for using the HTML parser in push mode
To allow content encoding detection, size should be >= 4
The value of filename is used for fetching external entities
and error/warning reports.
htmlParseChunk ()
int htmlParseChunk (htmlParserCtxtPtr ctxt,
const char *chunk,
int size,
int terminate); |
Parse a Chunk of memory