package sexp

import (
	"bytes"
	"fmt"
	"io"
	"strconv"
)

// Parses S-expressions from a given io.RuneReader.
//
// Returned node is a virtual list node with all the S-expressions read from
// the stream as children. In case of a syntax error, the returned error is not
// nil.
//
// It's worth explaining where do you get *SourceFile from. The typical way to
// create it is:
//     var ctx SourceContext
//     f := ctx.AddFile(filename, length)
//
// And you'll be able to use ctx later for decoding source location
// information. It's ok to provide -1 as length if it's unknown. In that case
// though you won't be able to add more files to the given SourceContext until
// the file with unknown length is finalized, which happens when parsing is
// finished.
//
// Also f is optional, nil is a perfectly valid argument for it, in that case
// it will create a temporary context and add an unnamed file to it. Less setup
// work is required, but you lose the ability to decode error source code
// locations.
func Parse(r io.RuneReader, f *SourceFile) (*Node, error) {
	var ctx SourceContext
	if f == nil {
		f = ctx.AddFile("", -1)
	}

	var p parser
	p.r = r
	p.f = f
	p.last_seq = seq{offset: -1}
	p.expect_eof = true
	return p.parse()
}

// Parses a single S-expression node from a stream.
//
// Returns just one node, be it a value or a list, doesn't touch the rest of
// the data. In case of a syntax error, the returned error is not nil.
//
// Note that unlike Parse it requires io.RuneScanner. It's a technical
// requirement, because in some cases s-expressions syntax delimiter is not
// part of the s-expression value, like in a very simple example: "x y". "x"
// here will be returned as a value Node, but " y" should remain untouched,
// however without reading the space character we can't tell if this is the end
// of "x" or not. Hence the requirement of being able to unread one rune.
//
// It's unclear what to do about error reporting for S-expressions read from
// the stream. The usual idea of lines and columns doesn't apply here. Hence if
// you do want to report errors gracefully some hacks will be necessary to do
// so.
//
// NOTE: Maybe ParseOne will be changed in future to better serve the need of
// good error reporting.
func ParseOne(r io.RuneScanner, f *SourceFile) (*Node, error) {
	var ctx SourceContext
	if f == nil {
		f = ctx.AddFile("", -1)
	}

	var p parser
	p.r = r
	p.rs = r
	p.f = f
	p.last_seq = seq{offset: -1}
	p.expect_eof = true
	return p.parse_one_node()
}

// This error structure is Parse* functions family specific, it returns information
// about errors encountered during parsing. Location can be decoded using the
// context you passed in as an argument. If the context was nil, then the location
// is simply a byte offset from the beginning of the input stream.
type ParseError struct {
	Location SourceLoc
	message  string
}

// Satisfy the built-in error interface. Returns the error message (without
// source location).
func (e *ParseError) Error() string {
	return e.message
}

var seq_delims = map[rune]rune{
	'(': ')',
	'`': '`',
	'"': '"',
}

func is_hex(r rune) bool {
	return (r >= '0' && r <= '9') ||
		(r >= 'a' && r <= 'f') ||
		(r >= 'A' && r <= 'F')
}

func is_space(r rune) bool {
	return r == ' ' || r == '\t' || r == '\n' || r == '\r'
}

func is_delimiter(r rune) bool {
	return is_space(r) || r == ')' || r == ';' || r == 0
}

type seq struct {
	offset int
	rune   rune
}

type delim_state struct {
	last_seq   seq
	expect_eof bool
}

type parser struct {
	r      io.RuneReader
	rs     io.RuneScanner
	f      *SourceFile
	buf    bytes.Buffer
	offset int
	cur    rune
	curlen int
	delim_state
}

func (p *parser) advance_delim_state() delim_state {
	s := p.delim_state
	p.last_seq = seq{p.offset, p.cur}
	p.expect_eof = false
	return s
}

func (p *parser) restore_delim_state(s delim_state) {
	p.delim_state = s
}

func (p *parser) error(loc SourceLoc, format string, args ...interface{}) {
	panic(&ParseError{
		Location: loc,
		message:  fmt.Sprintf(format, args...),
	})
}

func (p *parser) next() {
	p.offset += p.curlen
	r, s, err := p.r.ReadRune()
	if err != nil {
		if err == io.EOF {
			if p.expect_eof {
				p.cur = 0
				p.curlen = 0
				return
			}
			p.error(p.f.Encode(p.last_seq.offset),
				"missing matching sequence delimiter '%c'",
				seq_delims[p.last_seq.rune])
		}
		p.error(p.f.Encode(p.offset),
			"unexpected read error: %s", err)
	}

	p.cur = r
	p.curlen = s
	if r == '\n' {
		p.f.AddLine(p.offset + p.curlen)
	}
}

func (p *parser) skip_spaces() {
	for {
		if is_space(p.cur) {
			p.next()
		} else {
			return
		}
	}
	panic("unreachable")
}

func (p *parser) skip_comment() {
	for {
		// there was an EOF, return
		if p.cur == 0 {
			return
		}

		// read until '\n'
		if p.cur != '\n' {
			p.next()
		} else {
			// skip '\n' and return
			p.next()
			return
		}
	}
	panic("unreachable")
}

func (p *parser) parse_node() *Node {
again:
	// the convention is that this function is called on a non-space `p.cur`
	switch p.cur {
	case ')':
		return nil
	case '(':
		return p.parse_list()
	case '"':
		return p.parse_string()
	case '`':
		return p.parse_raw_string()
	case ';':
		// skip comment
		p.skip_comment()
		p.skip_spaces()
		goto again
	case 0:
		// delayed expected EOF
		panic(io.EOF)
	default:
		return p.parse_ident()
	}
	panic("unreachable")
}

func (p *parser) parse_list() *Node {
	loc := p.f.Encode(p.offset)
	save := p.advance_delim_state()

	head := &Node{Location: loc}
	p.next() // skip opening '('

	var lastchild *Node
	for {
		p.skip_spaces()
		if p.cur == ')' {
			// skip enclosing ')', but it could be EOF also
			p.restore_delim_state(save)
			p.next()
			return head
		}

		node := p.parse_node()
		if node == nil {
			continue
		}
		if head.Children == nil {
			head.Children = node
		} else {
			lastchild.Next = node
		}
		lastchild = node
	}
	panic("unreachable")
}

func (p *parser) parse_esc_seq() {
	loc := p.f.Encode(p.offset)

	p.next() // skip '\\'
	switch p.cur {
	case 'a':
		p.next()
		p.buf.WriteByte('\a')
	case 'b':
		p.next()
		p.buf.WriteByte('\b')
	case 'f':
		p.next()
		p.buf.WriteByte('\f')
	case 'n':
		p.next()
		p.buf.WriteByte('\n')
	case 'r':
		p.next()
		p.buf.WriteByte('\r')
	case 't':
		p.next()
		p.buf.WriteByte('\t')
	case 'v':
		p.next()
		p.buf.WriteByte('\v')
	case '\\':
		p.next()
		p.buf.WriteByte('\\')
	case '"':
		p.next()
		p.buf.WriteByte('"')
	default:
		switch p.cur {
		case 'x':
			p.next() // skip 'x'
			p.parse_hex_rune(2)
		case 'u':
			p.next() // skip 'u'
			p.parse_hex_rune(4)
		case 'U':
			p.next() // skip 'U'
			p.parse_hex_rune(8)
		default:
			p.error(loc, `unrecognized escape sequence within '"' string`)
		}
	}
}

func (p *parser) parse_hex_rune(n int) {
	if n > 8 {
		panic("hex rune is too large")
	}

	var hex [8]byte
	p.next_hex(hex[:n])
	r, err := strconv.ParseUint(string(hex[:n]), 16, n*4) // 4 bits per hex digit
	panic_if_error(err)
	if n == 2 {
		p.buf.WriteByte(byte(r))
	} else {
		p.buf.WriteRune(rune(r))
	}
}

func (p *parser) next_hex(s []byte) {
	for i, n := 0, len(s); i < n; i++ {
		if !is_hex(p.cur) {
			loc := p.f.Encode(p.offset)
			p.error(loc, `'%c' is not a hex digit`, p.cur)
		}
		s[i] = byte(p.cur)
		p.next()
	}
}

func (p *parser) parse_string() *Node {
	loc := p.f.Encode(p.offset)
	save := p.advance_delim_state()

	p.next() // skip opening '"'
	for {
		switch p.cur {
		case '\n':
			p.error(loc, `newline is not allowed within '"' strings`)
		case '\\':
			p.parse_esc_seq()
		case '"':
			node := &Node{
				Location: loc,
				Value:    p.buf.String(),
			}
			p.buf.Reset()

			// consume enclosing '"', could be EOF
			p.restore_delim_state(save)
			p.next()
			return node
		default:
			p.buf.WriteRune(p.cur)
			p.next()
		}
	}
	panic("unreachable")
}

func (p *parser) parse_raw_string() *Node {
	loc := p.f.Encode(p.offset)
	save := p.advance_delim_state()

	p.next() // skip opening '`'
	for {
		if p.cur == '`' {
			node := &Node{
				Location: loc,
				Value:    p.buf.String(),
			}
			p.buf.Reset()
			// consume enclosing '`', could be EOF
			p.restore_delim_state(save)
			p.next()
			return node
		} else {
			p.buf.WriteRune(p.cur)
			p.next()
		}
	}
	panic("unreachable")
}

func (p *parser) parse_ident() *Node {
	loc := p.f.Encode(p.offset)
	for {
		if is_delimiter(p.cur) {
			node := &Node{
				Location: loc,
				Value:    p.buf.String(),
			}
			p.buf.Reset()
			return node
		} else {
			p.buf.WriteRune(p.cur)
			p.next()
		}
	}
	panic("unreachable")
}

func (p *parser) parse() (root *Node, err error) {
	defer func() {
		if e := recover(); e != nil {
			p.f.Finalize(p.offset)
			if e == io.EOF {
				return
			}
			if sexperr, ok := e.(*ParseError); ok {
				root = nil
				err = sexperr
				return
			}
			panic(e)
		}
	}()

	root = new(Node)
	p.next()

	// don't worry, will eventually panic with io.EOF :D
	var lastchild *Node
	for {
		p.skip_spaces()
		node := p.parse_node()
		if node == nil {
			p.error(p.f.Encode(p.offset),
				"unexpected ')' at the top level")
		}
		if root.Children == nil {
			root.Children = node
		} else {
			lastchild.Next = node
		}
		lastchild = node
	}
	panic("unreachable")
}

func (p *parser) parse_one_node() (node *Node, err error) {
	defer func() {
		if e := recover(); e != nil {
			p.f.Finalize(p.offset)
			if e == io.EOF {
				return
			}
			if sexperr, ok := e.(*ParseError); ok {
				node = nil
				err = sexperr
				return
			}
			panic(e)
		}
	}()

	p.next()
	p.skip_spaces()
	node = p.parse_node()
	if node == nil {
		p.error(p.f.Encode(p.offset),
			"unexpected ')' at the top level")
	}
	err = p.rs.UnreadRune()
	return
}