Class HTMLParser
- java.lang.Object
-
- org.apache.lucene.benchmark.byTask.feeds.demohtml.HTMLParser
-
- All Implemented Interfaces:
HTMLParserConstants
public class HTMLParser extends Object implements HTMLParserConstants
-
-
Field Summary
Fields Modifier and Type Field Description Token
jj_nt
Next token.static int
SUMMARY_LENGTH
Token
token
Current token.HTMLParserTokenManager
token_source
Generated Token Manager.-
Fields inherited from interface org.apache.lucene.benchmark.byTask.feeds.demohtml.HTMLParserConstants
AfterEquals, ArgEquals, ArgName, ArgQuote1, ArgQuote2, ArgValue, CloseQuote1, CloseQuote2, Comment1, Comment2, CommentEnd1, CommentEnd2, CommentText1, CommentText2, DeclName, DEFAULT, Entity, EOF, HEX, LET, NUM, Punct, Quote1Text, Quote2Text, ScriptEnd, ScriptStart, ScriptText, SP, Space, TagEnd, TagName, tokenImage, WithinComment1, WithinComment2, WithinQuote1, WithinQuote2, WithinScript, WithinTag, Word
-
-
Constructor Summary
Constructors Constructor Description HTMLParser(InputStream stream)
Constructor with InputStream.HTMLParser(InputStream stream, String encoding)
Constructor with InputStream and supplied encodingHTMLParser(Reader stream)
Constructor.HTMLParser(HTMLParserTokenManager tm)
Constructor with generated Token Manager.
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description Token
ArgValue()
void
CommentTag()
Token
Decl()
void
disable_tracing()
Disable tracing.void
enable_tracing()
Enable tracing.ParseException
generateParseException()
Generate ParseException.Properties
getMetaTags()
Token
getNextToken()
Get the next Token.Reader
getReader()
String
getSummary()
String
getTitle()
Token
getToken(int index)
Get the specific Token.void
HTMLDocument()
void
ReInit(InputStream stream)
Reinitialise.void
ReInit(InputStream stream, String encoding)
Reinitialise.void
ReInit(Reader stream)
Reinitialise.void
ReInit(HTMLParserTokenManager tm)
Reinitialise.void
ScriptTag()
void
Tag()
-
-
-
Field Detail
-
SUMMARY_LENGTH
public static int SUMMARY_LENGTH
-
token_source
public HTMLParserTokenManager token_source
Generated Token Manager.
-
token
public Token token
Current token.
-
jj_nt
public Token jj_nt
Next token.
-
-
Constructor Detail
-
HTMLParser
public HTMLParser(InputStream stream)
Constructor with InputStream.
-
HTMLParser
public HTMLParser(InputStream stream, String encoding)
Constructor with InputStream and supplied encoding
-
HTMLParser
public HTMLParser(Reader stream)
Constructor.
-
HTMLParser
public HTMLParser(HTMLParserTokenManager tm)
Constructor with generated Token Manager.
-
-
Method Detail
-
getTitle
public String getTitle() throws IOException, InterruptedException
- Throws:
IOException
InterruptedException
-
getMetaTags
public Properties getMetaTags() throws IOException, InterruptedException
- Throws:
IOException
InterruptedException
-
getSummary
public String getSummary() throws IOException, InterruptedException
- Throws:
IOException
InterruptedException
-
getReader
public Reader getReader() throws IOException
- Throws:
IOException
-
HTMLDocument
public final void HTMLDocument() throws ParseException, IOException
- Throws:
ParseException
IOException
-
Tag
public final void Tag() throws ParseException, IOException
- Throws:
ParseException
IOException
-
ArgValue
public final Token ArgValue() throws ParseException
- Throws:
ParseException
-
Decl
public final Token Decl() throws ParseException
- Throws:
ParseException
-
CommentTag
public final void CommentTag() throws ParseException
- Throws:
ParseException
-
ScriptTag
public final void ScriptTag() throws ParseException
- Throws:
ParseException
-
ReInit
public void ReInit(InputStream stream)
Reinitialise.
-
ReInit
public void ReInit(InputStream stream, String encoding)
Reinitialise.
-
ReInit
public void ReInit(Reader stream)
Reinitialise.
-
ReInit
public void ReInit(HTMLParserTokenManager tm)
Reinitialise.
-
getNextToken
public final Token getNextToken()
Get the next Token.
-
getToken
public final Token getToken(int index)
Get the specific Token.
-
generateParseException
public ParseException generateParseException()
Generate ParseException.
-
enable_tracing
public final void enable_tracing()
Enable tracing.
-
disable_tracing
public final void disable_tracing()
Disable tracing.
-
-