View file File name : parser.rb Content :# encoding: utf-8 require_relative 'token-scanner' require_relative 'tokenizer' module Crass # Parses a CSS string or list of tokens. # # 5. http://dev.w3.org/csswg/css-syntax/#parsing class Parser BLOCK_END_TOKENS = { :'{' => :'}', :'[' => :']', :'(' => :')' } # -- Class Methods --------------------------------------------------------- # Parses CSS properties (such as the contents of an HTML element's `style` # attribute) and returns a parse tree. # # See {Tokenizer#initialize} for _options_. # # 5.3.6. http://dev.w3.org/csswg/css-syntax/#parse-a-list-of-declarations def self.parse_properties(input, options = {}) Parser.new(input, options).parse_properties end # Parses CSS rules (such as the content of a `@media` block) and returns a # parse tree. The only difference from {parse_stylesheet} is that CDO/CDC # nodes (`<!--` and `-->`) aren't ignored. # # See {Tokenizer#initialize} for _options_. # # 5.3.3. http://dev.w3.org/csswg/css-syntax/#parse-a-list-of-rules def self.parse_rules(input, options = {}) parser = Parser.new(input, options) rules = parser.consume_rules rules.map do |rule| if rule[:node] == :qualified_rule parser.create_style_rule(rule) else rule end end end # Parses a CSS stylesheet and returns a parse tree. # # See {Tokenizer#initialize} for _options_. # # 5.3.2. http://dev.w3.org/csswg/css-syntax/#parse-a-stylesheet def self.parse_stylesheet(input, options = {}) parser = Parser.new(input, options) rules = parser.consume_rules(:top_level => true) rules.map do |rule| if rule[:node] == :qualified_rule parser.create_style_rule(rule) else rule end end end # Converts a node or array of nodes into a CSS string based on their # original tokenized input. # # Options: # # * **:exclude_comments** - When `true`, comments will be excluded. # def self.stringify(nodes, options = {}) nodes = [nodes] unless nodes.is_a?(Array) string = '' nodes.each do |node| next if node.nil? case node[:node] when :at_rule string << '@' string << node[:name] string << self.stringify(node[:prelude], options) if node[:block] string << '{' << self.stringify(node[:block], options) << '}' else string << ';' end when :comment string << node[:raw] unless options[:exclude_comments] when :simple_block string << node[:start] string << self.stringify(node[:value], options) string << node[:end] when :style_rule string << self.stringify(node[:selector][:tokens], options) string << '{' << self.stringify(node[:children], options) << '}' else if node.key?(:raw) string << node[:raw] elsif node.key?(:tokens) string << self.stringify(node[:tokens], options) end end end string end # -- Instance Methods ------------------------------------------------------ # {TokenScanner} wrapping the tokens generated from this parser's input. attr_reader :tokens # Initializes a parser based on the given _input_, which may be a CSS string # or an array of tokens. # # See {Tokenizer#initialize} for _options_. def initialize(input, options = {}) unless input.kind_of?(Enumerable) input = Tokenizer.tokenize(input, options) end @tokens = TokenScanner.new(input) end # Consumes an at-rule and returns it. # # 5.4.2. http://dev.w3.org/csswg/css-syntax-3/#consume-at-rule def consume_at_rule(input = @tokens) rule = {} rule[:tokens] = input.collect do rule[:name] = input.consume[:value] rule[:prelude] = [] while token = input.consume node = token[:node] if node == :comment # Non-standard. next elsif node == :semicolon break elsif node === :'{' # Note: The spec says the block should _be_ the consumed simple # block, but Simon Sapin's CSS parsing tests and tinycss2 expect # only the _value_ of the consumed simple block here. I assume I'm # interpreting the spec too literally, so I'm going with the # tinycss2 behavior. rule[:block] = consume_simple_block(input)[:value] break elsif node == :simple_block && token[:start] == '{' # Note: The spec says the block should _be_ the simple block, but # Simon Sapin's CSS parsing tests and tinycss2 expect only the # _value_ of the simple block here. I assume I'm interpreting the # spec too literally, so I'm going with the tinycss2 behavior. rule[:block] = token[:value] break else input.reconsume rule[:prelude] << consume_component_value(input) end end end create_node(:at_rule, rule) end # Consumes a component value and returns it, or `nil` if there are no more # tokens. # # 5.4.6. http://dev.w3.org/csswg/css-syntax-3/#consume-a-component-value def consume_component_value(input = @tokens) return nil unless token = input.consume case token[:node] when :'{', :'[', :'(' consume_simple_block(input) when :function if token.key?(:name) # This is a parsed function, not a function token. This step isn't # mentioned in the spec, but it's necessary to avoid re-parsing # functions that have already been parsed. token else consume_function(input) end else token end end # Consumes a declaration and returns it, or `nil` on parse error. # # 5.4.5. http://dev.w3.org/csswg/css-syntax-3/#consume-a-declaration def consume_declaration(input = @tokens) declaration = {} value = [] declaration[:tokens] = input.collect do declaration[:name] = input.consume[:value] next_token = input.peek while next_token && next_token[:node] == :whitespace input.consume next_token = input.peek end unless next_token && next_token[:node] == :colon # Parse error. # # Note: The spec explicitly says to return nothing here, but Simon # Sapin's CSS parsing tests expect an error node. return create_node(:error, :value => 'invalid') end input.consume until input.peek.nil? value << consume_component_value(input) end end # Look for !important. important_tokens = value.reject {|token| node = token[:node] node == :whitespace || node == :comment || node == :semicolon }.last(2) if important_tokens.size == 2 && important_tokens[0][:node] == :delim && important_tokens[0][:value] == '!' && important_tokens[1][:node] == :ident && important_tokens[1][:value].downcase == 'important' declaration[:important] = true excl_index = value.index(important_tokens[0]) # Technically the spec doesn't require us to trim trailing tokens after # the !important, but Simon Sapin's CSS parsing tests expect it and # tinycss2 does it, so we'll go along with the cool kids. value.slice!(excl_index, value.size - excl_index) else declaration[:important] = false end declaration[:value] = value create_node(:declaration, declaration) end # Consumes a list of declarations and returns them. # # By default, the returned list may include `:comment`, `:semicolon`, and # `:whitespace` nodes, which is non-standard. # # Options: # # * **:strict** - Set to `true` to exclude non-standard `:comment`, # `:semicolon`, and `:whitespace` nodes. # # 5.4.4. http://dev.w3.org/csswg/css-syntax/#consume-a-list-of-declarations def consume_declarations(input = @tokens, options = {}) declarations = [] while token = input.consume case token[:node] # Non-standard: Preserve comments, semicolons, and whitespace. when :comment, :semicolon, :whitespace declarations << token unless options[:strict] when :at_keyword # When parsing a style rule, this is a parse error. Otherwise it's # not. input.reconsume declarations << consume_at_rule(input) when :ident decl_tokens = [token] while next_token = input.peek break if next_token[:node] == :semicolon decl_tokens << consume_component_value(input) end if decl = consume_declaration(TokenScanner.new(decl_tokens)) declarations << decl end else # Parse error (invalid property name, etc.). # # Note: The spec doesn't say we should append anything to the list of # declarations here, but Simon Sapin's CSS parsing tests expect an # error node. declarations << create_node(:error, :value => 'invalid') input.reconsume while next_token = input.peek break if next_token[:node] == :semicolon consume_component_value(input) end end end declarations end # Consumes a function and returns it. # # 5.4.8. http://dev.w3.org/csswg/css-syntax-3/#consume-a-function def consume_function(input = @tokens) function = { :name => input.current[:value], :value => [], :tokens => [input.current] # Non-standard, used for serialization. } function[:tokens].concat(input.collect { while token = input.consume case token[:node] when :')' break # Non-standard. when :comment next else input.reconsume function[:value] << consume_component_value(input) end end }) create_node(:function, function) end # Consumes a qualified rule and returns it, or `nil` if a parse error # occurs. # # 5.4.3. http://dev.w3.org/csswg/css-syntax-3/#consume-a-qualified-rule def consume_qualified_rule(input = @tokens) rule = {:prelude => []} rule[:tokens] = input.collect do while true unless token = input.consume # Parse error. # # Note: The spec explicitly says to return nothing here, but Simon # Sapin's CSS parsing tests expect an error node. return create_node(:error, :value => 'invalid') end if token[:node] == :'{' # Note: The spec says the block should _be_ the consumed simple # block, but Simon Sapin's CSS parsing tests and tinycss2 expect # only the _value_ of the consumed simple block here. I assume I'm # interpreting the spec too literally, so I'm going with the # tinycss2 behavior. rule[:block] = consume_simple_block(input)[:value] break elsif token[:node] == :simple_block && token[:start] == '{' # Note: The spec says the block should _be_ the simple block, but # Simon Sapin's CSS parsing tests and tinycss2 expect only the # _value_ of the simple block here. I assume I'm interpreting the # spec too literally, so I'm going with the tinycss2 behavior. rule[:block] = token[:value] break else input.reconsume rule[:prelude] << consume_component_value(input) end end end create_node(:qualified_rule, rule) end # Consumes a list of rules and returns them. # # 5.4.1. http://dev.w3.org/csswg/css-syntax/#consume-a-list-of-rules def consume_rules(flags = {}) rules = [] while token = @tokens.consume case token[:node] # Non-standard. Spec says to discard comments and whitespace, but we # keep them so we can serialize faithfully. when :comment, :whitespace rules << token when :cdc, :cdo unless flags[:top_level] @tokens.reconsume rule = consume_qualified_rule rules << rule if rule end when :at_keyword @tokens.reconsume rule = consume_at_rule rules << rule if rule else @tokens.reconsume rule = consume_qualified_rule rules << rule if rule end end rules end # Consumes and returns a simple block associated with the current input # token. # # 5.4.7. http://dev.w3.org/csswg/css-syntax/#consume-a-simple-block def consume_simple_block(input = @tokens) start_token = input.current[:node] end_token = BLOCK_END_TOKENS[start_token] block = { :start => start_token.to_s, :end => end_token.to_s, :value => [], :tokens => [input.current] # Non-standard. Used for serialization. } block[:tokens].concat(input.collect do while token = input.consume break if token[:node] == end_token input.reconsume block[:value] << consume_component_value(input) end end) create_node(:simple_block, block) end # Creates and returns a new parse node with the given _properties_. def create_node(type, properties = {}) {:node => type}.merge!(properties) end # Parses the given _input_ tokens into a selector node and returns it. # # Doesn't bother splitting the selector list into individual selectors or # validating them. Feel free to do that yourself! It'll be fun! def create_selector(input) create_node(:selector, :value => parse_value(input), :tokens => input) end # Creates a `:style_rule` node from the given qualified _rule_, and returns # it. def create_style_rule(rule) create_node(:style_rule, :selector => create_selector(rule[:prelude]), :children => parse_properties(rule[:block])) end # Parses a single component value and returns it. # # 5.3.7. http://dev.w3.org/csswg/css-syntax-3/#parse-a-component-value def parse_component_value(input = @tokens) input = TokenScanner.new(input) unless input.is_a?(TokenScanner) while input.peek && input.peek[:node] == :whitespace input.consume end if input.peek.nil? return create_node(:error, :value => 'empty') end value = consume_component_value(input) while input.peek && input.peek[:node] == :whitespace input.consume end if input.peek.nil? value else create_node(:error, :value => 'extra-input') end end # Parses a list of component values and returns an array of parsed tokens. # # 5.3.8. http://dev.w3.org/csswg/css-syntax/#parse-a-list-of-component-values def parse_component_values(input = @tokens) input = TokenScanner.new(input) unless input.is_a?(TokenScanner) tokens = [] while token = consume_component_value(input) tokens << token end tokens end # Parses a single declaration and returns it. # # 5.3.5. http://dev.w3.org/csswg/css-syntax/#parse-a-declaration def parse_declaration(input = @tokens) input = TokenScanner.new(input) unless input.is_a?(TokenScanner) while input.peek && input.peek[:node] == :whitespace input.consume end if input.peek.nil? # Syntax error. return create_node(:error, :value => 'empty') elsif input.peek[:node] != :ident # Syntax error. return create_node(:error, :value => 'invalid') end if decl = consume_declaration(input) return decl end # Syntax error. create_node(:error, :value => 'invalid') end # Parses a list of declarations and returns them. # # See {#consume_declarations} for _options_. # # 5.3.6. http://dev.w3.org/csswg/css-syntax/#parse-a-list-of-declarations def parse_declarations(input = @tokens, options = {}) input = TokenScanner.new(input) unless input.is_a?(TokenScanner) consume_declarations(input, options) end # Parses a list of declarations and returns an array of `:property` nodes # (and any non-declaration nodes that were in the input). This is useful for # parsing the contents of an HTML element's `style` attribute. def parse_properties(input = @tokens) properties = [] parse_declarations(input).each do |decl| unless decl[:node] == :declaration properties << decl next end children = decl[:value].dup children.pop if children.last && children.last[:node] == :semicolon properties << create_node(:property, :name => decl[:name], :value => parse_value(decl[:value]), :children => children, :important => decl[:important], :tokens => decl[:tokens]) end properties end # Parses a single rule and returns it. # # 5.3.4. http://dev.w3.org/csswg/css-syntax-3/#parse-a-rule def parse_rule(input = @tokens) input = TokenScanner.new(input) unless input.is_a?(TokenScanner) while input.peek && input.peek[:node] == :whitespace input.consume end if input.peek.nil? # Syntax error. return create_node(:error, :value => 'empty') elsif input.peek[:node] == :at_keyword rule = consume_at_rule(input) else rule = consume_qualified_rule(input) end while input.peek && input.peek[:node] == :whitespace input.consume end if input.peek.nil? rule else # Syntax error. create_node(:error, :value => 'extra-input') end end # Returns the unescaped value of a selector name or property declaration. def parse_value(nodes) nodes = [nodes] unless nodes.is_a?(Array) string = '' nodes.each do |node| case node[:node] when :comment, :semicolon next when :at_keyword, :ident string << node[:value] when :function if node[:value].is_a?(String) string << node[:value] string << '(' else string << parse_value(node[:tokens]) end else if node.key?(:raw) string << node[:raw] elsif node.key?(:tokens) string << parse_value(node[:tokens]) end end end string.strip end end end