2017-11-20 06:10:04 +01:00
package css // import "github.com/tdewolff/parse/css"
import (
"bytes"
"io"
"strconv"
"github.com/tdewolff/parse"
)
var wsBytes = [ ] byte ( " " )
var endBytes = [ ] byte ( "}" )
var emptyBytes = [ ] byte ( "" )
// GrammarType determines the type of grammar.
type GrammarType uint32
// GrammarType values.
const (
ErrorGrammar GrammarType = iota // extra token when errors occur
CommentGrammar
AtRuleGrammar
BeginAtRuleGrammar
EndAtRuleGrammar
QualifiedRuleGrammar
BeginRulesetGrammar
EndRulesetGrammar
DeclarationGrammar
TokenGrammar
CustomPropertyGrammar
)
// String returns the string representation of a GrammarType.
func ( tt GrammarType ) String ( ) string {
switch tt {
case ErrorGrammar :
return "Error"
case CommentGrammar :
return "Comment"
case AtRuleGrammar :
return "AtRule"
case BeginAtRuleGrammar :
return "BeginAtRule"
case EndAtRuleGrammar :
return "EndAtRule"
case QualifiedRuleGrammar :
return "QualifiedRule"
case BeginRulesetGrammar :
return "BeginRuleset"
case EndRulesetGrammar :
return "EndRuleset"
case DeclarationGrammar :
return "Declaration"
case TokenGrammar :
return "Token"
case CustomPropertyGrammar :
return "CustomProperty"
}
return "Invalid(" + strconv . Itoa ( int ( tt ) ) + ")"
}
////////////////////////////////////////////////////////////////
// State is the state function the parser currently is in.
type State func ( * Parser ) GrammarType
// Token is a single TokenType and its associated data.
type Token struct {
TokenType
Data [ ] byte
}
2018-07-06 07:18:51 +02:00
func ( t Token ) String ( ) string {
return t . TokenType . String ( ) + "('" + string ( t . Data ) + "')"
}
2017-11-20 06:10:04 +01:00
// Parser is the state for the parser.
type Parser struct {
l * Lexer
state [ ] State
err error
buf [ ] Token
level int
tt TokenType
data [ ] byte
prevWS bool
prevEnd bool
}
// NewParser returns a new CSS parser from an io.Reader. isInline specifies whether this is an inline style attribute.
func NewParser ( r io . Reader , isInline bool ) * Parser {
l := NewLexer ( r )
p := & Parser {
l : l ,
state : make ( [ ] State , 0 , 4 ) ,
}
if isInline {
p . state = append ( p . state , ( * Parser ) . parseDeclarationList )
} else {
p . state = append ( p . state , ( * Parser ) . parseStylesheet )
}
return p
}
// Err returns the error encountered during parsing, this is often io.EOF but also other errors can be returned.
func ( p * Parser ) Err ( ) error {
if p . err != nil {
return p . err
}
return p . l . Err ( )
}
// Restore restores the NULL byte at the end of the buffer.
func ( p * Parser ) Restore ( ) {
p . l . Restore ( )
}
// Next returns the next Grammar. It returns ErrorGrammar when an error was encountered. Using Err() one can retrieve the error message.
func ( p * Parser ) Next ( ) ( GrammarType , TokenType , [ ] byte ) {
p . err = nil
if p . prevEnd {
p . tt , p . data = RightBraceToken , endBytes
p . prevEnd = false
} else {
p . tt , p . data = p . popToken ( true )
}
gt := p . state [ len ( p . state ) - 1 ] ( p )
return gt , p . tt , p . data
}
// Values returns a slice of Tokens for the last Grammar. Only AtRuleGrammar, BeginAtRuleGrammar, BeginRulesetGrammar and Declaration will return the at-rule components, ruleset selector and declaration values respectively.
func ( p * Parser ) Values ( ) [ ] Token {
return p . buf
}
func ( p * Parser ) popToken ( allowComment bool ) ( TokenType , [ ] byte ) {
p . prevWS = false
tt , data := p . l . Next ( )
for tt == WhitespaceToken || tt == CommentToken {
if tt == WhitespaceToken {
p . prevWS = true
} else if allowComment && len ( p . state ) == 1 {
break
}
tt , data = p . l . Next ( )
}
return tt , data
}
func ( p * Parser ) initBuf ( ) {
p . buf = p . buf [ : 0 ]
}
func ( p * Parser ) pushBuf ( tt TokenType , data [ ] byte ) {
p . buf = append ( p . buf , Token { tt , data } )
}
////////////////////////////////////////////////////////////////
func ( p * Parser ) parseStylesheet ( ) GrammarType {
if p . tt == CDOToken || p . tt == CDCToken {
return TokenGrammar
} else if p . tt == AtKeywordToken {
return p . parseAtRule ( )
} else if p . tt == CommentToken {
return CommentGrammar
} else if p . tt == ErrorToken {
return ErrorGrammar
}
return p . parseQualifiedRule ( )
}
func ( p * Parser ) parseDeclarationList ( ) GrammarType {
if p . tt == CommentToken {
p . tt , p . data = p . popToken ( false )
}
for p . tt == SemicolonToken {
p . tt , p . data = p . popToken ( false )
}
if p . tt == ErrorToken {
return ErrorGrammar
} else if p . tt == AtKeywordToken {
return p . parseAtRule ( )
} else if p . tt == IdentToken {
return p . parseDeclaration ( )
} else if p . tt == CustomPropertyNameToken {
return p . parseCustomProperty ( )
}
// parse error
p . initBuf ( )
p . err = parse . NewErrorLexer ( "unexpected token in declaration" , p . l . r )
for {
tt , data := p . popToken ( false )
if ( tt == SemicolonToken || tt == RightBraceToken ) && p . level == 0 || tt == ErrorToken {
p . prevEnd = ( tt == RightBraceToken )
return ErrorGrammar
}
p . pushBuf ( tt , data )
}
}
////////////////////////////////////////////////////////////////
func ( p * Parser ) parseAtRule ( ) GrammarType {
p . initBuf ( )
parse . ToLower ( p . data )
atRuleName := p . data
if len ( atRuleName ) > 0 && atRuleName [ 1 ] == '-' {
if i := bytes . IndexByte ( atRuleName [ 2 : ] , '-' ) ; i != - 1 {
atRuleName = atRuleName [ i + 2 : ] // skip vendor specific prefix
}
}
atRule := ToHash ( atRuleName [ 1 : ] )
first := true
skipWS := false
for {
tt , data := p . popToken ( false )
if tt == LeftBraceToken && p . level == 0 {
if atRule == Font_Face || atRule == Page {
p . state = append ( p . state , ( * Parser ) . parseAtRuleDeclarationList )
} else if atRule == Document || atRule == Keyframes || atRule == Media || atRule == Supports {
p . state = append ( p . state , ( * Parser ) . parseAtRuleRuleList )
} else {
p . state = append ( p . state , ( * Parser ) . parseAtRuleUnknown )
}
return BeginAtRuleGrammar
} else if ( tt == SemicolonToken || tt == RightBraceToken ) && p . level == 0 || tt == ErrorToken {
p . prevEnd = ( tt == RightBraceToken )
return AtRuleGrammar
} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
p . level ++
} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
p . level --
}
if first {
if tt == LeftParenthesisToken || tt == LeftBracketToken {
p . prevWS = false
}
first = false
}
if len ( data ) == 1 && ( data [ 0 ] == ',' || data [ 0 ] == ':' ) {
skipWS = true
} else if p . prevWS && ! skipWS && tt != RightParenthesisToken {
p . pushBuf ( WhitespaceToken , wsBytes )
} else {
skipWS = false
}
if tt == LeftParenthesisToken {
skipWS = true
}
p . pushBuf ( tt , data )
}
}
func ( p * Parser ) parseAtRuleRuleList ( ) GrammarType {
if p . tt == RightBraceToken || p . tt == ErrorToken {
p . state = p . state [ : len ( p . state ) - 1 ]
return EndAtRuleGrammar
} else if p . tt == AtKeywordToken {
return p . parseAtRule ( )
} else {
return p . parseQualifiedRule ( )
}
}
func ( p * Parser ) parseAtRuleDeclarationList ( ) GrammarType {
for p . tt == SemicolonToken {
p . tt , p . data = p . popToken ( false )
}
if p . tt == RightBraceToken || p . tt == ErrorToken {
p . state = p . state [ : len ( p . state ) - 1 ]
return EndAtRuleGrammar
}
return p . parseDeclarationList ( )
}
func ( p * Parser ) parseAtRuleUnknown ( ) GrammarType {
if p . tt == RightBraceToken && p . level == 0 || p . tt == ErrorToken {
p . state = p . state [ : len ( p . state ) - 1 ]
return EndAtRuleGrammar
}
if p . tt == LeftParenthesisToken || p . tt == LeftBraceToken || p . tt == LeftBracketToken || p . tt == FunctionToken {
p . level ++
} else if p . tt == RightParenthesisToken || p . tt == RightBraceToken || p . tt == RightBracketToken {
p . level --
}
return TokenGrammar
}
func ( p * Parser ) parseQualifiedRule ( ) GrammarType {
p . initBuf ( )
first := true
inAttrSel := false
skipWS := true
var tt TokenType
var data [ ] byte
for {
if first {
tt , data = p . tt , p . data
p . tt = WhitespaceToken
p . data = emptyBytes
first = false
} else {
tt , data = p . popToken ( false )
}
if tt == LeftBraceToken && p . level == 0 {
p . state = append ( p . state , ( * Parser ) . parseQualifiedRuleDeclarationList )
return BeginRulesetGrammar
} else if tt == ErrorToken {
p . err = parse . NewErrorLexer ( "unexpected ending in qualified rule, expected left brace token" , p . l . r )
return ErrorGrammar
} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
p . level ++
} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
p . level --
}
if len ( data ) == 1 && ( data [ 0 ] == ',' || data [ 0 ] == '>' || data [ 0 ] == '+' || data [ 0 ] == '~' ) {
if data [ 0 ] == ',' {
return QualifiedRuleGrammar
}
skipWS = true
} else if p . prevWS && ! skipWS && ! inAttrSel {
p . pushBuf ( WhitespaceToken , wsBytes )
} else {
skipWS = false
}
if tt == LeftBracketToken {
inAttrSel = true
} else if tt == RightBracketToken {
inAttrSel = false
}
p . pushBuf ( tt , data )
}
}
func ( p * Parser ) parseQualifiedRuleDeclarationList ( ) GrammarType {
for p . tt == SemicolonToken {
p . tt , p . data = p . popToken ( false )
}
if p . tt == RightBraceToken || p . tt == ErrorToken {
p . state = p . state [ : len ( p . state ) - 1 ]
return EndRulesetGrammar
}
return p . parseDeclarationList ( )
}
func ( p * Parser ) parseDeclaration ( ) GrammarType {
p . initBuf ( )
parse . ToLower ( p . data )
if tt , _ := p . popToken ( false ) ; tt != ColonToken {
p . err = parse . NewErrorLexer ( "unexpected token in declaration" , p . l . r )
return ErrorGrammar
}
skipWS := true
for {
tt , data := p . popToken ( false )
if ( tt == SemicolonToken || tt == RightBraceToken ) && p . level == 0 || tt == ErrorToken {
p . prevEnd = ( tt == RightBraceToken )
return DeclarationGrammar
} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
p . level ++
} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
p . level --
}
if len ( data ) == 1 && ( data [ 0 ] == ',' || data [ 0 ] == '/' || data [ 0 ] == ':' || data [ 0 ] == '!' || data [ 0 ] == '=' ) {
skipWS = true
} else if p . prevWS && ! skipWS {
p . pushBuf ( WhitespaceToken , wsBytes )
} else {
skipWS = false
}
p . pushBuf ( tt , data )
}
}
func ( p * Parser ) parseCustomProperty ( ) GrammarType {
p . initBuf ( )
if tt , _ := p . popToken ( false ) ; tt != ColonToken {
p . err = parse . NewErrorLexer ( "unexpected token in declaration" , p . l . r )
return ErrorGrammar
}
val := [ ] byte { }
for {
tt , data := p . l . Next ( )
if ( tt == SemicolonToken || tt == RightBraceToken ) && p . level == 0 || tt == ErrorToken {
p . prevEnd = ( tt == RightBraceToken )
p . pushBuf ( CustomPropertyValueToken , val )
return CustomPropertyGrammar
} else if tt == LeftParenthesisToken || tt == LeftBraceToken || tt == LeftBracketToken || tt == FunctionToken {
p . level ++
} else if tt == RightParenthesisToken || tt == RightBraceToken || tt == RightBracketToken {
p . level --
}
val = append ( val , data ... )
}
}