summaryrefslogtreecommitdiff
path: root/com/simatime/language/bs/parser.hs
blob: 3044a60843fd6ea417b5d2c30a4edc7ac9ba0921 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoImplicitPrelude #-}
module Language.Bs.Parser (
  readExpr
, readExprFile
) where

import Control.Monad (fail)
import Control.Monad (mzero)
import Data.Char (digitToInt)
import Data.Functor.Identity (Identity)
import Data.String
import qualified Data.Text as T
import Language.Bs.Expr
import Protolude hiding ((<|>), try)
import Text.Parsec
import qualified Text.Parsec.Language as Lang
import Text.Parsec.Text
import qualified Text.Parsec.Token as Tok

lexer :: Tok.GenTokenParser T.Text () Identity
lexer = Tok.makeTokenParser style

style :: Tok.GenLanguageDef T.Text () Identity
style = Lang.emptyDef {
  Tok.commentStart = "#|"
  , Tok.commentEnd = "|#"
  , Tok.commentLine = ";"
  , Tok.opStart = mzero
  , Tok.opLetter = mzero
  , Tok.identStart = letter <|> oneOf "!$%&*/:<=>?^_~"
  , Tok.identLetter = digit <|> letter <|> oneOf "!$%&*/:<=>?^_~+-.@"
  }

parens :: Parser a -> Parser a
parens = Tok.parens lexer

whitespace :: Parser ()
whitespace = Tok.whiteSpace lexer

lexeme :: Parser a -> Parser a
lexeme = Tok.lexeme lexer

quoted :: Parser a -> Parser a
quoted p = try (char '\'') *> p

identifier :: Parser T.Text
identifier = T.pack <$> (Tok.identifier lexer <|> specialIdentifier) <?> "identifier"
  where
  specialIdentifier :: Parser String
  specialIdentifier = lexeme $ try $
    string "-" <|> string "+" <|> string "..."

-- | The @Radix@ type consists of a base integer (e.g. @10@) and a parser for
-- digits in that base (e.g. @digit@).
type Radix = (Integer, Parser Char)

-- | Parse an integer, given a radix as output by @radix@.
-- Copied from Text.Parsec.Token
numberWithRadix :: Radix -> Parser Integer
numberWithRadix (base, baseDigit) = do
  digits <- many1 baseDigit
  let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
  seq n (return n)

decimal :: Parser Integer
decimal = Tok.decimal lexer

-- | Parse a sign, return either @id@ or @negate@ based on the sign parsed.
-- Copied from Text.Parsec.Token
sign :: Parser (Integer -> Integer)
sign = char '-' *> return negate
   <|> char '+' *> return identity
   <|> return identity

intRadix :: Radix -> Parser Integer
intRadix r = sign <*> numberWithRadix r

textLiteral :: Parser T.Text
textLiteral = T.pack <$> Tok.stringLiteral lexer

nil :: Parser ()
nil = try ((char '\'') *> string "()") *> return () <?> "nil"

hashVal :: Parser Expr
hashVal = lexeme $ char '#'
  *> (char 't' *> return (Bool True)
  <|> char 'f' *> return (Bool False)
  <|> char 'b' *> (Numb <$> intRadix (2, oneOf "01"))
  <|> char 'o' *> (Numb <$> intRadix (8, octDigit))
  <|> char 'd' *> (Numb <$> intRadix (10, digit))
  <|> char 'x' *> (Numb <$> intRadix (16, hexDigit))
  <|> oneOf "ei" *> fail "Unsupported: exactness"
  <|> char '(' *> fail "Unsupported: vector"
  <|> char '\\' *> fail "Unsupported: char")


lispVal :: Parser Expr
lispVal = hashVal
  <|> Nil <$ nil
  <|> Numb <$> try (sign <*> decimal)
  <|> Atom <$> identifier
  <|> Tape <$> textLiteral
  <|> _Quote <$> quoted lispVal
  <|> List <$> parens manyExpr

manyExpr :: Parser [Expr]
manyExpr = lispVal `sepBy` whitespace

_Quote :: Expr -> Expr
_Quote x = List [Atom "quote", x]

contents :: Parser a -> ParsecT T.Text () Identity a
contents p = whitespace *> lexeme p <* eof

readExpr :: T.Text -> Either ParseError Expr
readExpr = parse (contents lispVal) "<stdin>"

readExprFile :: SourceName -> T.Text -> Either ParseError Expr
readExprFile = parse (contents (List <$> manyExpr))