1 Star 4 Fork 0

chaosue / go-toml

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
lexer.go 9.21 KB
一键复制 编辑 Web IDE 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
// TOML lexer.
//
// Written using the principles developped by Rob Pike in
// http://www.youtube.com/watch?v=HxaD_trXwRE
package toml
import (
"fmt"
"regexp"
"strconv"
"strings"
"unicode/utf8"
)
var dateRegexp *regexp.Regexp
// Define state functions
type tomlLexStateFn func() tomlLexStateFn
// Define lexer
type tomlLexer struct {
input string
start int
pos int
width int
tokens chan token
depth int
line int
col int
}
func (l *tomlLexer) run() {
for state := l.lexVoid; state != nil; {
state = state()
}
close(l.tokens)
}
func (l *tomlLexer) nextStart() {
// iterate by runes (utf8 characters)
// search for newlines and advance line/col counts
for i := l.start; i < l.pos; {
r, width := utf8.DecodeRuneInString(l.input[i:])
if r == '\n' {
l.line++
l.col = 1
} else {
l.col++
}
i += width
}
// advance start position to next token
l.start = l.pos
}
func (l *tomlLexer) emit(t tokenType) {
l.tokens <- token{
Position: Position{l.line, l.col},
typ: t,
val: l.input[l.start:l.pos],
}
l.nextStart()
}
func (l *tomlLexer) emitWithValue(t tokenType, value string) {
l.tokens <- token{
Position: Position{l.line, l.col},
typ: t,
val: value,
}
l.nextStart()
}
func (l *tomlLexer) next() rune {
if l.pos >= len(l.input) {
l.width = 0
return eof
}
var r rune
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return r
}
func (l *tomlLexer) ignore() {
l.nextStart()
}
func (l *tomlLexer) backup() {
l.pos -= l.width
}
func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn {
l.tokens <- token{
Position: Position{l.line, l.col},
typ: tokenError,
val: fmt.Sprintf(format, args...),
}
return nil
}
func (l *tomlLexer) peek() rune {
r := l.next()
l.backup()
return r
}
func (l *tomlLexer) accept(valid string) bool {
if strings.IndexRune(valid, l.next()) >= 0 {
return true
}
l.backup()
return false
}
func (l *tomlLexer) follow(next string) bool {
return strings.HasPrefix(l.input[l.pos:], next)
}
func (l *tomlLexer) lexVoid() tomlLexStateFn {
for {
next := l.peek()
switch next {
case '[':
return l.lexKeyGroup
case '#':
return l.lexComment
case '=':
return l.lexEqual
}
if isSpace(next) {
l.ignore()
}
if l.depth > 0 {
return l.lexRvalue
}
if isKeyStartChar(next) {
return l.lexKey
}
if l.next() == eof {
break
}
}
l.emit(tokenEOF)
return nil
}
func (l *tomlLexer) lexRvalue() tomlLexStateFn {
for {
next := l.peek()
switch next {
case '.':
return l.errorf("cannot start float with a dot")
case '=':
return l.errorf("cannot have multiple equals for the same key")
case '[':
l.depth++
return l.lexLeftBracket
case ']':
l.depth--
return l.lexRightBracket
case '#':
return l.lexComment
case '"':
return l.lexString
case '\'':
return l.lexLiteralString
case ',':
return l.lexComma
case '\n':
l.ignore()
l.pos++
if l.depth == 0 {
return l.lexVoid
}
return l.lexRvalue
}
if l.follow("true") {
return l.lexTrue
}
if l.follow("false") {
return l.lexFalse
}
if isAlphanumeric(next) {
return l.lexKey
}
dateMatch := dateRegexp.FindString(l.input[l.pos:])
if dateMatch != "" {
l.ignore()
l.pos += len(dateMatch)
return l.lexDate
}
if next == '+' || next == '-' || isDigit(next) {
return l.lexNumber
}
if isSpace(next) {
l.ignore()
}
if l.next() == eof {
break
}
}
l.emit(tokenEOF)
return nil
}
func (l *tomlLexer) lexDate() tomlLexStateFn {
l.emit(tokenDate)
return l.lexRvalue
}
func (l *tomlLexer) lexTrue() tomlLexStateFn {
l.ignore()
l.pos += 4
l.emit(tokenTrue)
return l.lexRvalue
}
func (l *tomlLexer) lexFalse() tomlLexStateFn {
l.ignore()
l.pos += 5
l.emit(tokenFalse)
return l.lexRvalue
}
func (l *tomlLexer) lexEqual() tomlLexStateFn {
l.ignore()
l.accept("=")
l.emit(tokenEqual)
return l.lexRvalue
}
func (l *tomlLexer) lexComma() tomlLexStateFn {
l.ignore()
l.accept(",")
l.emit(tokenComma)
return l.lexRvalue
}
func (l *tomlLexer) lexKey() tomlLexStateFn {
l.ignore()
for r := l.next(); isKeyChar(r); r = l.next() {
if r == '#' {
return l.errorf("keys cannot contain # character")
}
}
l.backup()
l.emit(tokenKey)
return l.lexVoid
}
func (l *tomlLexer) lexComment() tomlLexStateFn {
for {
next := l.next()
if next == '\n' || next == eof {
break
}
}
l.ignore()
return l.lexVoid
}
func (l *tomlLexer) lexLeftBracket() tomlLexStateFn {
l.ignore()
l.pos++
l.emit(tokenLeftBracket)
return l.lexRvalue
}
func (l *tomlLexer) lexLiteralString() tomlLexStateFn {
l.pos++
l.ignore()
growingString := ""
// handle special case for triple-quote
terminator := "'"
if l.follow("''") {
l.pos += 2
l.ignore()
terminator = "'''"
// special case: discard leading newline
if l.peek() == '\n' {
l.pos++
l.ignore()
}
}
// find end of string
for {
if l.follow(terminator) {
l.emitWithValue(tokenString, growingString)
l.pos += len(terminator)
l.ignore()
return l.lexRvalue
}
growingString += string(l.peek())
if l.next() == eof {
break
}
}
return l.errorf("unclosed string")
}
func (l *tomlLexer) lexString() tomlLexStateFn {
l.pos++
l.ignore()
growingString := ""
// handle special case for triple-quote
terminator := "\""
if l.follow("\"\"") {
l.pos += 2
l.ignore()
terminator = "\"\"\""
// special case: discard leading newline
if l.peek() == '\n' {
l.pos++
l.ignore()
}
}
for {
if l.follow(terminator) {
l.emitWithValue(tokenString, growingString)
l.pos += len(terminator)
l.ignore()
return l.lexRvalue
}
if l.follow("\\") {
l.pos++
switch l.peek() {
case '\r':
fallthrough
case '\n':
fallthrough
case '\t':
fallthrough
case ' ':
// skip all whitespace chars following backslash
l.pos++
for strings.ContainsRune("\r\n\t ", l.peek()) {
l.pos++
}
l.pos--
case '"':
growingString += "\""
case 'n':
growingString += "\n"
case 'b':
growingString += "\b"
case 'f':
growingString += "\f"
case '/':
growingString += "/"
case 't':
growingString += "\t"
case 'r':
growingString += "\r"
case '\\':
growingString += "\\"
case 'u':
l.pos++
code := ""
for i := 0; i < 4; i++ {
c := l.peek()
l.pos++
if !isHexDigit(c) {
return l.errorf("unfinished unicode escape")
}
code = code + string(c)
}
l.pos--
intcode, err := strconv.ParseInt(code, 16, 32)
if err != nil {
return l.errorf("invalid unicode escape: \\u" + code)
}
growingString += string(rune(intcode))
default:
return l.errorf("invalid escape sequence: \\" + string(l.peek()))
}
} else {
growingString += string(l.peek())
}
if l.next() == eof {
break
}
}
return l.errorf("unclosed string")
}
func (l *tomlLexer) lexKeyGroup() tomlLexStateFn {
l.ignore()
l.pos++
if l.peek() == '[' {
// token '[[' signifies an array of anonymous key groups
l.pos++
l.emit(tokenDoubleLeftBracket)
return l.lexInsideKeyGroupArray
}
// vanilla key group
l.emit(tokenLeftBracket)
return l.lexInsideKeyGroup
}
func (l *tomlLexer) lexInsideKeyGroupArray() tomlLexStateFn {
for {
if l.peek() == ']' {
if l.pos > l.start {
l.emit(tokenKeyGroupArray)
}
l.ignore()
l.pos++
if l.peek() != ']' {
break // error
}
l.pos++
l.emit(tokenDoubleRightBracket)
return l.lexVoid
} else if l.peek() == '[' {
return l.errorf("group name cannot contain ']'")
}
if l.next() == eof {
break
}
}
return l.errorf("unclosed key group array")
}
func (l *tomlLexer) lexInsideKeyGroup() tomlLexStateFn {
for {
if l.peek() == ']' {
if l.pos > l.start {
l.emit(tokenKeyGroup)
}
l.ignore()
l.pos++
l.emit(tokenRightBracket)
return l.lexVoid
} else if l.peek() == '[' {
return l.errorf("group name cannot contain ']'")
}
if l.next() == eof {
break
}
}
return l.errorf("unclosed key group")
}
func (l *tomlLexer) lexRightBracket() tomlLexStateFn {
l.ignore()
l.pos++
l.emit(tokenRightBracket)
return l.lexRvalue
}
func (l *tomlLexer) lexNumber() tomlLexStateFn {
l.ignore()
if !l.accept("+") {
l.accept("-")
}
pointSeen := false
expSeen := false
digitSeen := false
for {
next := l.next()
if next == '.' {
if pointSeen {
return l.errorf("cannot have two dots in one float")
}
if !isDigit(l.peek()) {
return l.errorf("float cannot end with a dot")
}
pointSeen = true
} else if next == 'e' || next == 'E' {
expSeen = true
if !l.accept("+") {
l.accept("-")
}
} else if isDigit(next) {
digitSeen = true
} else {
l.backup()
break
}
if pointSeen && !digitSeen {
return l.errorf("cannot start float with a dot")
}
}
if !digitSeen {
return l.errorf("no digit in that number")
}
if pointSeen || expSeen {
l.emit(tokenFloat)
} else {
l.emit(tokenInteger)
}
return l.lexRvalue
}
func init() {
dateRegexp = regexp.MustCompile("^\\d{1,4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(\\.\\d{1,9})?(Z|[+-]\\d{2}:\\d{2})")
}
// Entry point
func lexToml(input string) chan token {
l := &tomlLexer{
input: input,
tokens: make(chan token),
line: 1,
col: 1,
}
go l.run()
return l.tokens
}
Go
1
https://gitee.com/chaos.su/go-toml.git
git@gitee.com:chaos.su/go-toml.git
chaos.su
go-toml
go-toml
orderedkeys

搜索帮助

14c37bed 8189591 565d56ea 8189591