summaryrefslogtreecommitdiff
path: root/Com/Simatime/language/Bs/Parser.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Com/Simatime/language/Bs/Parser.hs')
-rw-r--r--Com/Simatime/language/Bs/Parser.hs121
1 files changed, 121 insertions, 0 deletions
diff --git a/Com/Simatime/language/Bs/Parser.hs b/Com/Simatime/language/Bs/Parser.hs
new file mode 100644
index 0000000..3044a60
--- /dev/null
+++ b/Com/Simatime/language/Bs/Parser.hs
@@ -0,0 +1,121 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+module Language.Bs.Parser (
+ readExpr
+, readExprFile
+) where
+
+import Control.Monad (fail)
+import Control.Monad (mzero)
+import Data.Char (digitToInt)
+import Data.Functor.Identity (Identity)
+import Data.String
+import qualified Data.Text as T
+import Language.Bs.Expr
+import Protolude hiding ((<|>), try)
+import Text.Parsec
+import qualified Text.Parsec.Language as Lang
+import Text.Parsec.Text
+import qualified Text.Parsec.Token as Tok
+
+lexer :: Tok.GenTokenParser T.Text () Identity
+lexer = Tok.makeTokenParser style
+
+style :: Tok.GenLanguageDef T.Text () Identity
+style = Lang.emptyDef {
+ Tok.commentStart = "#|"
+ , Tok.commentEnd = "|#"
+ , Tok.commentLine = ";"
+ , Tok.opStart = mzero
+ , Tok.opLetter = mzero
+ , Tok.identStart = letter <|> oneOf "!$%&*/:<=>?^_~"
+ , Tok.identLetter = digit <|> letter <|> oneOf "!$%&*/:<=>?^_~+-.@"
+ }
+
+parens :: Parser a -> Parser a
+parens = Tok.parens lexer
+
+whitespace :: Parser ()
+whitespace = Tok.whiteSpace lexer
+
+lexeme :: Parser a -> Parser a
+lexeme = Tok.lexeme lexer
+
+quoted :: Parser a -> Parser a
+quoted p = try (char '\'') *> p
+
+identifier :: Parser T.Text
+identifier = T.pack <$> (Tok.identifier lexer <|> specialIdentifier) <?> "identifier"
+ where
+ specialIdentifier :: Parser String
+ specialIdentifier = lexeme $ try $
+ string "-" <|> string "+" <|> string "..."
+
+-- | The @Radix@ type consists of a base integer (e.g. @10@) and a parser for
+-- digits in that base (e.g. @digit@).
+type Radix = (Integer, Parser Char)
+
+-- | Parse an integer, given a radix as output by @radix@.
+-- Copied from Text.Parsec.Token
+numberWithRadix :: Radix -> Parser Integer
+numberWithRadix (base, baseDigit) = do
+ digits <- many1 baseDigit
+ let n = foldl (\x d -> base*x + toInteger (digitToInt d)) 0 digits
+ seq n (return n)
+
+decimal :: Parser Integer
+decimal = Tok.decimal lexer
+
+-- | Parse a sign, return either @id@ or @negate@ based on the sign parsed.
+-- Copied from Text.Parsec.Token
+sign :: Parser (Integer -> Integer)
+sign = char '-' *> return negate
+ <|> char '+' *> return identity
+ <|> return identity
+
+intRadix :: Radix -> Parser Integer
+intRadix r = sign <*> numberWithRadix r
+
+textLiteral :: Parser T.Text
+textLiteral = T.pack <$> Tok.stringLiteral lexer
+
+nil :: Parser ()
+nil = try ((char '\'') *> string "()") *> return () <?> "nil"
+
+hashVal :: Parser Expr
+hashVal = lexeme $ char '#'
+ *> (char 't' *> return (Bool True)
+ <|> char 'f' *> return (Bool False)
+ <|> char 'b' *> (Numb <$> intRadix (2, oneOf "01"))
+ <|> char 'o' *> (Numb <$> intRadix (8, octDigit))
+ <|> char 'd' *> (Numb <$> intRadix (10, digit))
+ <|> char 'x' *> (Numb <$> intRadix (16, hexDigit))
+ <|> oneOf "ei" *> fail "Unsupported: exactness"
+ <|> char '(' *> fail "Unsupported: vector"
+ <|> char '\\' *> fail "Unsupported: char")
+
+
+lispVal :: Parser Expr
+lispVal = hashVal
+ <|> Nil <$ nil
+ <|> Numb <$> try (sign <*> decimal)
+ <|> Atom <$> identifier
+ <|> Tape <$> textLiteral
+ <|> _Quote <$> quoted lispVal
+ <|> List <$> parens manyExpr
+
+manyExpr :: Parser [Expr]
+manyExpr = lispVal `sepBy` whitespace
+
+_Quote :: Expr -> Expr
+_Quote x = List [Atom "quote", x]
+
+contents :: Parser a -> ParsecT T.Text () Identity a
+contents p = whitespace *> lexeme p <* eof
+
+readExpr :: T.Text -> Either ParseError Expr
+readExpr = parse (contents lispVal) "<stdin>"
+
+readExprFile :: SourceName -> T.Text -> Either ParseError Expr
+readExprFile = parse (contents (List <$> manyExpr))