| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328 |
- module.exports = tokenize
- var through = require('through')
- var PSEUDOSTART = 'pseudo-start'
- , ATTR_START = 'attr-start'
- , ANY_CHILD = 'any-child'
- , ATTR_COMP = 'attr-comp'
- , ATTR_END = 'attr-end'
- , PSEUDOPSEUDO = '::'
- , PSEUDOCLASS = ':'
- , READY = '(ready)'
- , OPERATION = 'op'
- , CLASS = 'class'
- , COMMA = 'comma'
- , ATTR = 'attr'
- , SUBJECT = '!'
- , TAG = 'tag'
- , STAR = '*'
- , ID = 'id'
- function tokenize() {
- var escaped = false
- , gathered = []
- , state = READY
- , data = []
- , idx = 0
- , stream
- , length
- , quote
- , depth
- , lhs
- , rhs
- , cmp
- , c
- return stream = through(ondata, onend)
- function ondata(chunk) {
- data = data.concat(chunk.split(''))
- length = data.length
- while(idx < length && (c = data[idx++])) {
- switch(state) {
- case READY: state_ready(); break
- case ANY_CHILD: state_any_child(); break
- case OPERATION: state_op(); break
- case ATTR_START: state_attr_start(); break
- case ATTR_COMP: state_attr_compare(); break
- case ATTR_END: state_attr_end(); break
- case PSEUDOCLASS:
- case PSEUDOPSEUDO: state_pseudo(); break
- case PSEUDOSTART: state_pseudostart(); break
- case ID:
- case TAG:
- case CLASS: state_gather(); break
- }
- }
- data = data.slice(idx)
- }
- function onend(chunk) {
- if(arguments.length) {
- ondata(chunk)
- }
- if(gathered.length) {
- stream.queue(token())
- }
- }
- function state_ready() {
- switch(true) {
- case '#' === c: state = ID; break
- case '.' === c: state = CLASS; break
- case ':' === c: state = PSEUDOCLASS; break
- case '[' === c: state = ATTR_START; break
- case '!' === c: subject(); break
- case '*' === c: star(); break
- case ',' === c: comma(); break
- case /[>\+~]/.test(c): state = OPERATION; break
- case /\s/.test(c): state = ANY_CHILD; break
- case /[\w\d\-_]/.test(c): state = TAG; --idx; break
- }
- }
- function subject() {
- state = SUBJECT
- gathered = ['!']
- stream.queue(token())
- state = READY
- }
- function star() {
- state = STAR
- gathered = ['*']
- stream.queue(token())
- state = READY
- }
- function comma() {
- state = COMMA
- gathered = [',']
- stream.queue(token())
- state = READY
- }
- function state_op() {
- if(/[>\+~]/.test(c)) {
- return gathered.push(c)
- }
- // chomp down the following whitespace.
- if(/\s/.test(c)) {
- return
- }
- stream.queue(token())
- state = READY
- --idx
- }
- function state_any_child() {
- if(/\s/.test(c)) {
- return
- }
- if(/[>\+~]/.test(c)) {
- return --idx, state = OPERATION
- }
- stream.queue(token())
- state = READY
- --idx
- }
- function state_pseudo() {
- rhs = state
- state_gather(true)
- if(state !== READY) {
- return
- }
- if(c === '(') {
- lhs = gathered.join('')
- state = PSEUDOSTART
- gathered.length = 0
- depth = 1
- ++idx
- return
- }
- state = PSEUDOCLASS
- stream.queue(token())
- state = READY
- }
- function state_pseudostart() {
- if(gathered.length === 0 && !quote) {
- quote = /['"]/.test(c) ? c : null
- if(quote) {
- return
- }
- }
- if(quote) {
- if(!escaped && c === quote) {
- quote = null
- return
- }
- if(c === '\\') {
- escaped ? gathered.push(c) : (escaped = true)
- return
- }
- escaped = false
- gathered.push(c)
- return
- }
- gathered.push(c)
- if(c === '(') {
- ++depth
- } else if(c === ')') {
- --depth
- }
-
- if(!depth) {
- gathered.pop()
- stream.queue({
- type: rhs
- , data: lhs + '(' + gathered.join('') + ')'
- })
- state = READY
- lhs = rhs = cmp = null
- gathered.length = 0
- }
- return
- }
- function state_attr_start() {
- state_gather(true)
- if(state !== READY) {
- return
- }
- if(c === ']') {
- state = ATTR
- stream.queue(token())
- state = READY
- return
- }
- lhs = gathered.join('')
- gathered.length = 0
- state = ATTR_COMP
- }
- function state_attr_compare() {
- if(/[=~|$^*]/.test(c)) {
- gathered.push(c)
- }
- if(gathered.length === 2 || c === '=') {
- cmp = gathered.join('')
- gathered.length = 0
- state = ATTR_END
- quote = null
- return
- }
- }
- function state_attr_end() {
- if(!gathered.length && !quote) {
- quote = /['"]/.test(c) ? c : null
- if(quote) {
- return
- }
- }
- if(quote) {
- if(!escaped && c === quote) {
- quote = null
- return
- }
- if(c === '\\') {
- if(escaped) {
- gathered.push(c)
- }
- escaped = !escaped
- return
- }
- escaped = false
- gathered.push(c)
- return
- }
- state_gather(true)
- if(state !== READY) {
- return
- }
- stream.queue({
- type: ATTR
- , data: {
- lhs: lhs
- , rhs: gathered.join('')
- , cmp: cmp
- }
- })
- state = READY
- lhs = rhs = cmp = null
- gathered.length = 0
- return
- }
- function state_gather(quietly) {
- if(/[^\d\w\-_]/.test(c) && !escaped) {
- if(c === '\\') {
- escaped = true
- } else {
- !quietly && stream.queue(token())
- state = READY
- --idx
- }
- return
- }
- escaped = false
- gathered.push(c)
- }
- function token() {
- var data = gathered.join('')
- gathered.length = 0
- return {
- type: state
- , data: data
- }
- }
- }
|