aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEinhard Leichtfuß <alguien@respiranto.de>2025-05-19 01:57:15 +0200
committerEinhard Leichtfuß <alguien@respiranto.de>2025-05-19 01:57:15 +0200
commit96b767d7cab6c8ca41f656e41dd57196cb45e233 (patch)
tree4d551007ae81f52a9430298867664649b66f59fc
parent3b8dda1e8dc86cd584ee8cba0435abb6bca4301d (diff)
Expression AST and parser
Deleted old AST.hs; old code shall re-appear.
-rw-r--r--src/Control/Monad/Combinators/FailExpr.hs10
-rw-r--r--src/Language/SimpleShell/AST.hs104
-rw-r--r--src/Language/SimpleShell/AST/Expr.hs47
-rw-r--r--src/Language/SimpleShell/AST/SimpleType.hs14
-rw-r--r--src/Language/SimpleShell/Parser.hs52
-rw-r--r--src/Language/SimpleShell/Parser/Expr.hs182
6 files changed, 305 insertions, 104 deletions
diff --git a/src/Control/Monad/Combinators/FailExpr.hs b/src/Control/Monad/Combinators/FailExpr.hs
index 045bd00..0b4b063 100644
--- a/src/Control/Monad/Combinators/FailExpr.hs
+++ b/src/Control/Monad/Combinators/FailExpr.hs
@@ -20,6 +20,16 @@ where
-- Notes:
-- * We can merge any operators of the same precedence and associativity.
-- (TODO?)
+-- * CONSIDER: Support unary (prefix) operators.
+-- - Not deemed too useful, because we believe that unary (prefix)
+-- operators should always have highest precedence.
+-- - Consider the common boolean `!`.
+-- - One might want to parse `! 3 == 3` as `! (3 == 3)`, which would
+-- require `!` to have a specific precedence (lower than `==`).
+-- - However, `! true == true`, should certainly be parsed as
+-- `(! true) == true`.
+-- - We currently do not support different precedence based on the
+-- operands' types, and that seems generally difficult.
import Prelude hiding (exp)
diff --git a/src/Language/SimpleShell/AST.hs b/src/Language/SimpleShell/AST.hs
deleted file mode 100644
index 5a222c3..0000000
--- a/src/Language/SimpleShell/AST.hs
+++ /dev/null
@@ -1,104 +0,0 @@
-{-# LANGUAGE GADTs #-}
-
-module Language.SimpleShell.AST
-where
-
-data SimpleType a where
- IntType :: SimpleType Integer
- StringType :: SimpleType String
- BoolType :: SimpleType Bool
-
-data SimpleTypeList a where
- NilTs :: SimpleTypeList ()
- ConsTs :: SimpleType a -> SimpleTypeList as -> SimpleTypeList (a, as)
-
--- | Pure expression (no side effects).
-data Expr a where
- Literal :: SimpleType a -> a -> Expr a
- VarE :: Var a -> Expr a
- FunctionCall :: FunctionRef a args -> ExprList args -> Expr a
- And :: Expr Bool -> Expr Bool -> Expr Bool
- Or :: Expr Bool -> Expr Bool -> Expr Bool
- Not :: Expr Bool -> Expr Bool
- Eq :: Expr a -> Expr a -> Expr Bool
- Neq :: Expr a -> Expr a -> Expr Bool
- Gt :: Expr Integer -> Expr Integer -> Expr Bool
- Ge :: Expr Integer -> Expr Integer -> Expr Bool
- Lt :: Expr Integer -> Expr Integer -> Expr Bool
- Le :: Expr Integer -> Expr Integer -> Expr Bool
- Ternary :: Expr Bool -> Expr a -> Expr a -> Expr a
- Length :: Expr String -> Expr Integer
- StrToInt :: Expr String -> Expr Integer
- IntToStr :: Expr Integer -> Expr String
-
-data FunctionRef a args = FunctionRef String
-
-data Function a args
- = Function
- (Args args)
- (Expr a)
-
-data Function' where
- Function'
- :: SimpleType a
- -> SimpleTypeList as
- -> Function a as
- -> Function'
-
-data ExprList a where
- NilE :: ExprList ()
- ConsE :: Expr a -> ExprList as -> ExprList (a, as)
-
-data Var a = Var (SimpleType a) String
-
-data Var' where
- Var' :: Var a -> Var'
-
-data Args args where
- NilArgs :: Args ()
- ConsArgs :: Var a -> Args as -> Args (a, as)
-
-data Statement ret
- = AssignExpr ExprAssignment'
- | Print (Expr String)
- | PrintErr (Expr String)
- | If (Expr Bool) [Statement ret] [Statement ret]
- | While (Expr Bool) [Statement ret]
- | RunCommand
- (Maybe (Var Integer)) -- shell return value
- (Maybe (Var String)) -- stdout
- (Maybe (Var String)) -- stderr
- (Command ret)
- | RunProcedure ProcAssignment'
- | Return (Expr ret)
-
-data ExprAssignment' where
- ExprAssignment' :: Var a -> Expr a -> ExprAssignment'
-
-data ProcAssignment' where
- ProcAssignment'
- :: Maybe (Var a) -- procedure return value
- -> Maybe (Var String) -- stdout
- -> Maybe (Var String) -- stderr
- -> ProcRef a args
- -> Args args
- -> ProcAssignment'
-
-data Command ret
- = ExtCmd (Expr String) [Expr String]
- | CompoundCmd [Statement ret]
-
-data Procedure ret args
- = Procedure
- (Args args)
- [Var'] -- ^ variable declarations
- (Command ret)
-
-data ProcRef ret args = ProcRef String
-
-data Procedure' where
- Procedure'
- :: SimpleType ret
- -> SimpleTypeList args
- -> Procedure ret args
- -> Procedure'
diff --git a/src/Language/SimpleShell/AST/Expr.hs b/src/Language/SimpleShell/AST/Expr.hs
new file mode 100644
index 0000000..0aaf16d
--- /dev/null
+++ b/src/Language/SimpleShell/AST/Expr.hs
@@ -0,0 +1,47 @@
+module Language.SimpleShell.AST.Expr
+ ( Expr(..)
+ , TypedExpr
+ , VarName
+ , FunName
+ )
+where
+
+
+import Language.SimpleShell.AST.SimpleType (SimpleType)
+
+import Data.Text (Text)
+
+
+type VarName = Text
+type FunName = Text
+
+
+-- | Pure expression (no side effects).
+data Expr
+ = IntLiteral Integer
+ | StrLiteral String
+ | BoolLiteral Bool
+ | Var VarName
+ | FunCall FunName [Expr]
+ | And Expr Expr
+ | Or Expr Expr
+ | Not Expr
+ | Eq Expr Expr
+ | Neq Expr Expr
+ | Gt Expr Expr
+ | Ge Expr Expr
+ | Lt Expr Expr
+ | Le Expr Expr
+ | Add Expr Expr
+ | Sub Expr Expr
+ | Mul Expr Expr
+ | Div Expr Expr
+ | UMinus Expr
+ | Concat Expr Expr
+ | Ternary Expr Expr Expr
+ | Length Expr
+ | IntCast Expr
+ | StrCast Expr
+ deriving (Show)
+
+type TypedExpr = (SimpleType, Expr)
diff --git a/src/Language/SimpleShell/AST/SimpleType.hs b/src/Language/SimpleShell/AST/SimpleType.hs
new file mode 100644
index 0000000..733f19a
--- /dev/null
+++ b/src/Language/SimpleShell/AST/SimpleType.hs
@@ -0,0 +1,14 @@
+module Language.SimpleShell.AST.SimpleType
+ ( SimpleType(..)
+ , FunSig
+ )
+where
+
+
+data SimpleType
+ = IntType
+ | StrType
+ | BoolType
+ deriving (Show, Eq)
+
+type FunSig = (SimpleType, [SimpleType])
diff --git a/src/Language/SimpleShell/Parser.hs b/src/Language/SimpleShell/Parser.hs
new file mode 100644
index 0000000..be78beb
--- /dev/null
+++ b/src/Language/SimpleShell/Parser.hs
@@ -0,0 +1,52 @@
+module Language.SimpleShell.Parser
+ ( Parser
+ , lookupVar
+ , lookupFun
+ , initContext
+ , parseTest'
+ )
+where
+
+
+import Language.SimpleShell.AST.Expr (FunName, VarName)
+import Language.SimpleShell.AST.SimpleType (SimpleType, FunSig)
+
+import Control.Monad.Reader (ask, ReaderT, runReaderT)
+import Data.Map (Map)
+import qualified Data.Map as Map (empty, lookup)
+import Data.Text (Text, unpack)
+import Data.Void (Void)
+import Text.Megaparsec
+
+
+type Parser = ReaderT Context (Parsec Void Text)
+
+data Context = Context
+ { ctxVars :: Map VarName SimpleType
+ , ctxFuns :: Map FunName FunSig
+ }
+
+initContext :: Context
+initContext = Context
+ { ctxVars = Map.empty
+ , ctxFuns = Map.empty
+ }
+
+
+lookupVar :: VarName -> Parser SimpleType
+lookupVar varname = do
+ mt <- Map.lookup varname . ctxVars <$> ask
+ case mt of
+ Just t -> return t
+ Nothing -> fail $ "Undeclared variable $" ++ unpack varname
+
+lookupFun :: FunName -> Parser FunSig
+lookupFun fname = do
+ msig <- Map.lookup fname . ctxFuns <$> ask
+ case msig of
+ Just sig -> return sig
+ Nothing -> fail $ "Undefined function " ++ unpack fname
+
+
+parseTest' :: Show a => Parser a -> Text -> IO ()
+parseTest' p = parseTest (runReaderT p initContext)
diff --git a/src/Language/SimpleShell/Parser/Expr.hs b/src/Language/SimpleShell/Parser/Expr.hs
new file mode 100644
index 0000000..d4d0b81
--- /dev/null
+++ b/src/Language/SimpleShell/Parser/Expr.hs
@@ -0,0 +1,182 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TupleSections #-}
+
+module Language.SimpleShell.Parser.Expr
+ ( exprP
+ )
+where
+
+
+import Language.SimpleShell.Parser (Parser, lookupVar, lookupFun)
+import Language.SimpleShell.AST.Expr (Expr(..), TypedExpr, VarName, FunName)
+import Language.SimpleShell.AST.SimpleType (SimpleType(..))
+import Control.Monad.Combinators.FailExpr
+ ( Associativity(..)
+ , makeExprParser
+ , Operator(..)
+ )
+
+import Control.Applicative ((<|>))
+import Control.Monad.Combinators (manyTill)
+import Data.Char (isAlpha)
+import Data.Foldable (asum)
+import Data.Text (Text)
+import Text.Megaparsec (takeWhile1P)
+import Text.Megaparsec.Char (string, char)
+import qualified Text.Megaparsec.Char.Lexer as L (charLiteral, decimal)
+
+
+exprP :: Parser TypedExpr
+exprP
+ = makeExprParser termP binaryOperatorTable
+ <|> builtinUnaryFunP
+ <|> funP
+
+termP :: Parser TypedExpr
+termP
+ = literalP
+ <|> varP
+ <|> unaryOpP
+ <|> char '(' *> exprP <* char ')'
+
+
+-- | Parse expression with fixed type.
+exprP' :: String -> SimpleType -> Parser Expr
+exprP' errMsg t = do
+ (t', e) <- exprP
+ if t == t'
+ then return e
+ else fail errMsg
+
+
+literalP :: Parser TypedExpr
+literalP
+ = (IntType,) . IntLiteral <$> L.decimal
+ <|> (StrType,) . StrLiteral <$> strLitP
+ <|> (BoolType,) . BoolLiteral <$> boolLitP
+ where
+ strLitP = char '"' *> manyTill L.charLiteral (char '"')
+ boolLitP
+ = True <$ string "true"
+ <|> False <$ string "false"
+
+varP :: Parser TypedExpr
+varP = do
+ _ <- char '$'
+ x <- varNameP
+ t <- lookupVar x
+ return (t, Var x)
+
+funP :: Parser TypedExpr
+funP = do
+ fname <- funNameP
+ (t', ts) <- lookupFun fname
+ args <- mapM (exprP' "Type mismatch with function signature.") ts
+ return (t', FunCall fname args)
+
+
+-- TODO
+varNameP :: Parser VarName
+varNameP = takeWhile1P Nothing isAlpha
+
+-- TODO
+funNameP :: Parser FunName
+funNameP = takeWhile1P Nothing isAlpha
+
+
+-- Binary operators.
+
+type BinaryFun = Expr -> Expr -> Expr
+type BinarySig = SimpleType -> SimpleType -> Maybe SimpleType
+
+binaryOperatorTable :: [[Operator Parser TypedExpr]]
+binaryOperatorTable =
+ [ [ binary AssocR "||" Or $ sameSig BoolType
+ ]
+ , [ binary AssocR "&&" And $ sameSig BoolType
+ ]
+ , [ binary AssocN "==" Eq $ anyCmpSig
+ , binary AssocN "!=" Neq $ anyCmpSig
+ , binary AssocN "<=" Le $ intCmpSig
+ , binary AssocN "<" Lt $ intCmpSig
+ , binary AssocN ">=" Ge $ intCmpSig
+ , binary AssocN ">" Gt $ intCmpSig
+ ]
+ , [ Binary AssocL addP
+ , binary AssocL "-" Sub $ sameSig IntType
+ ]
+ , [ binary AssocL "*" Mul $ sameSig IntType
+ , binary AssocL "/" Div $ sameSig IntType
+ ]
+ ]
+ where
+ sameSig :: SimpleType -> BinarySig
+ sameSig t t1 t2 = if t == t1 && t == t2 then Just t else Nothing
+
+ anyCmpSig t t'
+ | t == t' = Just BoolType
+ | otherwise = Nothing
+
+ intCmpSig IntType IntType = Just BoolType
+ intCmpSig _ _ = Nothing
+
+ addP :: Parser (TypedExpr -> TypedExpr -> Maybe TypedExpr)
+ addP = do
+ _ <- char '+'
+ return $ \(t1, x1) (t2, x2) ->
+ if t1 == t2
+ then
+ case t1 of
+ StrType -> Just (StrType, Concat x1 x2)
+ IntType -> Just (IntType, Add x1 x2)
+ _ -> Nothing
+ else Nothing
+
+ binary
+ :: Associativity -> Text -> BinaryFun -> BinarySig
+ -> Operator Parser TypedExpr
+ binary assoc symbol op sig = Binary assoc $ do
+ _ <- string symbol
+ return $ \(t1, e1) (t2, e2) -> fmap (, op e1 e2) $ sig t1 t2
+
+
+-- Unary operators and builtin unary functions.
+
+type UnaryFun = Expr -> Expr
+type UnarySig = SimpleType -> Maybe SimpleType
+type UnaryParser = Parser (TypedExpr -> Maybe TypedExpr)
+
+unaryOperators, builtinUnaryFuns :: [UnaryParser]
+(unaryOperators, builtinUnaryFuns) =
+ ( [ unary "!" Not $ uniqueSig BoolType BoolType
+ , unary "-" UMinus $ uniqueSig IntType IntType
+ ]
+ , [ unary "length" Length $ uniqueSig StrType IntType
+ , unary "int" IntCast $ uniqueSig StrType IntType
+ , unary "str" StrCast $ uniqueSig IntType StrType
+ ]
+ )
+ where
+ uniqueSig :: SimpleType -> SimpleType -> UnarySig
+ uniqueSig t1 t2 t1'
+ | t1 == t1' = Just t2
+ | otherwise = Nothing
+
+ unary :: Text -> UnaryFun -> UnarySig -> UnaryParser
+ unary symbol op sig = do
+ _ <- string symbol
+ return $ \(t, e) -> fmap (, op e) $ sig t
+
+unaryOpP, builtinUnaryFunP :: Parser TypedExpr
+(unaryOpP, builtinUnaryFunP) =
+ ( asum $ map (aux "unary operator" termP) unaryOperators
+ , asum $ map (aux "builtin unary function" exprP) builtinUnaryFuns
+ )
+ where
+ aux :: String -> Parser TypedExpr -> UnaryParser -> Parser TypedExpr
+ aux desc argP p = do
+ f <- p
+ x <- argP
+ case f x of
+ Just x' -> return x'
+ Nothing -> fail $ "Mismatching " ++ desc ++ " signature."