Class Tokeniser

java.lang.Object
org.jsoup.parser.Tokeniser

final class Tokeniser extends Object
Readers the input stream into tokens.
  • Field Details

    • replacementChar

      static final char replacementChar
      See Also:
    • notCharRefCharsSorted

      private static final char[] notCharRefCharsSorted
    • win1252ExtensionsStart

      static final int win1252ExtensionsStart
      See Also:
    • win1252Extensions

      static final int[] win1252Extensions
    • reader

      private final CharacterReader reader
    • errors

      private final ParseErrorList errors
    • state

      private TokeniserState state
    • emitPending

      @Nullable private Token emitPending
    • isEmitPending

      private boolean isEmitPending
    • charsString

      @Nullable private String charsString
    • charsBuilder

      private final StringBuilder charsBuilder
    • dataBuffer

      StringBuilder dataBuffer
    • startPending

      Token.StartTag startPending
    • endPending

      Token.EndTag endPending
    • tagPending

      Token.Tag tagPending
    • charPending

      Token.Character charPending
    • doctypePending

      Token.Doctype doctypePending
    • commentPending

      Token.Comment commentPending
    • lastStartTag

      @Nullable private String lastStartTag
    • lastStartCloseSeq

      @Nullable private String lastStartCloseSeq
    • Unset

      private static final int Unset
      See Also:
    • markupStartPos

      private int markupStartPos
    • charStartPos

      private int charStartPos
    • codepointHolder

      private final int[] codepointHolder
    • multipointHolder

      private final int[] multipointHolder
  • Constructor Details

  • Method Details

    • read

      Token read()
    • emit

      void emit(Token token)
    • emit

      void emit(String str)
    • emit

      void emit(StringBuilder str)
    • emit

      void emit(char c)
    • emit

      void emit(char[] chars)
    • emit

      void emit(int[] codepoints)
    • getState

      TokeniserState getState()
    • transition

      void transition(TokeniserState newState)
    • advanceTransition

      void advanceTransition(TokeniserState newState)
    • consumeCharacterReference

      @Nullable int[] consumeCharacterReference(@Nullable Character additionalAllowedCharacter, boolean inAttribute)
    • createTagPending

      Token.Tag createTagPending(boolean start)
    • emitTagPending

      void emitTagPending()
    • createCommentPending

      void createCommentPending()
    • emitCommentPending

      void emitCommentPending()
    • createBogusCommentPending

      void createBogusCommentPending()
    • createDoctypePending

      void createDoctypePending()
    • emitDoctypePending

      void emitDoctypePending()
    • createTempBuffer

      void createTempBuffer()
    • isAppropriateEndTagToken

      boolean isAppropriateEndTagToken()
    • appropriateEndTagName

      @Nullable String appropriateEndTagName()
    • appropriateEndTagSeq

      String appropriateEndTagSeq()
      Returns the closer sequence </lastStart
    • error

      void error(TokeniserState state)
    • eofError

      void eofError(TokeniserState state)
    • characterReferenceError

      private void characterReferenceError(String message, Object... args)
    • error

      void error(String errorMsg)
    • error

      void error(String errorMsg, Object... args)
    • currentNodeInHtmlNS

      boolean currentNodeInHtmlNS()
    • unescapeEntities

      String unescapeEntities(boolean inAttribute)
      Utility method to consume reader and unescape entities found within.
      Parameters:
      inAttribute - if the text to be unescaped is in an attribute
      Returns:
      unescaped string from reader