diff options
author | Einhard Leichtfuß <alguien@respiranto.de> | 2025-05-19 01:57:15 +0200 |
---|---|---|
committer | Einhard Leichtfuß <alguien@respiranto.de> | 2025-05-19 01:57:15 +0200 |
commit | 96b767d7cab6c8ca41f656e41dd57196cb45e233 (patch) | |
tree | 4d551007ae81f52a9430298867664649b66f59fc | |
parent | 3b8dda1e8dc86cd584ee8cba0435abb6bca4301d (diff) |
Expression AST and parser
Deleted old AST.hs; old code shall re-appear.
-rw-r--r-- | src/Control/Monad/Combinators/FailExpr.hs | 10 | ||||
-rw-r--r-- | src/Language/SimpleShell/AST.hs | 104 | ||||
-rw-r--r-- | src/Language/SimpleShell/AST/Expr.hs | 47 | ||||
-rw-r--r-- | src/Language/SimpleShell/AST/SimpleType.hs | 14 | ||||
-rw-r--r-- | src/Language/SimpleShell/Parser.hs | 52 | ||||
-rw-r--r-- | src/Language/SimpleShell/Parser/Expr.hs | 182 |
6 files changed, 305 insertions, 104 deletions
diff --git a/src/Control/Monad/Combinators/FailExpr.hs b/src/Control/Monad/Combinators/FailExpr.hs index 045bd00..0b4b063 100644 --- a/src/Control/Monad/Combinators/FailExpr.hs +++ b/src/Control/Monad/Combinators/FailExpr.hs @@ -20,6 +20,16 @@ where -- Notes: -- * We can merge any operators of the same precedence and associativity. -- (TODO?) +-- * CONSIDER: Support unary (prefix) operators. +-- - Not deemed too useful, because we believe that unary (prefix) +-- operators should always have highest precedence. +-- - Consider the common boolean `!`. +-- - One might want to parse `! 3 == 3` as `! (3 == 3)`, which would +-- require `!` to have a specific precedence (lower than `==`). +-- - However, `! true == true`, should certainly be parsed as +-- `(! true) == true`. +-- - We currently do not support different precedence based on the +-- operands' types, and that seems generally difficult. import Prelude hiding (exp) diff --git a/src/Language/SimpleShell/AST.hs b/src/Language/SimpleShell/AST.hs deleted file mode 100644 index 5a222c3..0000000 --- a/src/Language/SimpleShell/AST.hs +++ /dev/null @@ -1,104 +0,0 @@ -{-# LANGUAGE GADTs #-} - -module Language.SimpleShell.AST -where - -data SimpleType a where - IntType :: SimpleType Integer - StringType :: SimpleType String - BoolType :: SimpleType Bool - -data SimpleTypeList a where - NilTs :: SimpleTypeList () - ConsTs :: SimpleType a -> SimpleTypeList as -> SimpleTypeList (a, as) - --- | Pure expression (no side effects). -data Expr a where - Literal :: SimpleType a -> a -> Expr a - VarE :: Var a -> Expr a - FunctionCall :: FunctionRef a args -> ExprList args -> Expr a - And :: Expr Bool -> Expr Bool -> Expr Bool - Or :: Expr Bool -> Expr Bool -> Expr Bool - Not :: Expr Bool -> Expr Bool - Eq :: Expr a -> Expr a -> Expr Bool - Neq :: Expr a -> Expr a -> Expr Bool - Gt :: Expr Integer -> Expr Integer -> Expr Bool - Ge :: Expr Integer -> Expr Integer -> Expr Bool - Lt :: Expr Integer -> Expr Integer -> Expr Bool - Le :: Expr Integer -> Expr Integer -> Expr Bool - Ternary :: Expr Bool -> Expr a -> Expr a -> Expr a - Length :: Expr String -> Expr Integer - StrToInt :: Expr String -> Expr Integer - IntToStr :: Expr Integer -> Expr String - -data FunctionRef a args = FunctionRef String - -data Function a args - = Function - (Args args) - (Expr a) - -data Function' where - Function' - :: SimpleType a - -> SimpleTypeList as - -> Function a as - -> Function' - -data ExprList a where - NilE :: ExprList () - ConsE :: Expr a -> ExprList as -> ExprList (a, as) - -data Var a = Var (SimpleType a) String - -data Var' where - Var' :: Var a -> Var' - -data Args args where - NilArgs :: Args () - ConsArgs :: Var a -> Args as -> Args (a, as) - -data Statement ret - = AssignExpr ExprAssignment' - | Print (Expr String) - | PrintErr (Expr String) - | If (Expr Bool) [Statement ret] [Statement ret] - | While (Expr Bool) [Statement ret] - | RunCommand - (Maybe (Var Integer)) -- shell return value - (Maybe (Var String)) -- stdout - (Maybe (Var String)) -- stderr - (Command ret) - | RunProcedure ProcAssignment' - | Return (Expr ret) - -data ExprAssignment' where - ExprAssignment' :: Var a -> Expr a -> ExprAssignment' - -data ProcAssignment' where - ProcAssignment' - :: Maybe (Var a) -- procedure return value - -> Maybe (Var String) -- stdout - -> Maybe (Var String) -- stderr - -> ProcRef a args - -> Args args - -> ProcAssignment' - -data Command ret - = ExtCmd (Expr String) [Expr String] - | CompoundCmd [Statement ret] - -data Procedure ret args - = Procedure - (Args args) - [Var'] -- ^ variable declarations - (Command ret) - -data ProcRef ret args = ProcRef String - -data Procedure' where - Procedure' - :: SimpleType ret - -> SimpleTypeList args - -> Procedure ret args - -> Procedure' diff --git a/src/Language/SimpleShell/AST/Expr.hs b/src/Language/SimpleShell/AST/Expr.hs new file mode 100644 index 0000000..0aaf16d --- /dev/null +++ b/src/Language/SimpleShell/AST/Expr.hs @@ -0,0 +1,47 @@ +module Language.SimpleShell.AST.Expr + ( Expr(..) + , TypedExpr + , VarName + , FunName + ) +where + + +import Language.SimpleShell.AST.SimpleType (SimpleType) + +import Data.Text (Text) + + +type VarName = Text +type FunName = Text + + +-- | Pure expression (no side effects). +data Expr + = IntLiteral Integer + | StrLiteral String + | BoolLiteral Bool + | Var VarName + | FunCall FunName [Expr] + | And Expr Expr + | Or Expr Expr + | Not Expr + | Eq Expr Expr + | Neq Expr Expr + | Gt Expr Expr + | Ge Expr Expr + | Lt Expr Expr + | Le Expr Expr + | Add Expr Expr + | Sub Expr Expr + | Mul Expr Expr + | Div Expr Expr + | UMinus Expr + | Concat Expr Expr + | Ternary Expr Expr Expr + | Length Expr + | IntCast Expr + | StrCast Expr + deriving (Show) + +type TypedExpr = (SimpleType, Expr) diff --git a/src/Language/SimpleShell/AST/SimpleType.hs b/src/Language/SimpleShell/AST/SimpleType.hs new file mode 100644 index 0000000..733f19a --- /dev/null +++ b/src/Language/SimpleShell/AST/SimpleType.hs @@ -0,0 +1,14 @@ +module Language.SimpleShell.AST.SimpleType + ( SimpleType(..) + , FunSig + ) +where + + +data SimpleType + = IntType + | StrType + | BoolType + deriving (Show, Eq) + +type FunSig = (SimpleType, [SimpleType]) diff --git a/src/Language/SimpleShell/Parser.hs b/src/Language/SimpleShell/Parser.hs new file mode 100644 index 0000000..be78beb --- /dev/null +++ b/src/Language/SimpleShell/Parser.hs @@ -0,0 +1,52 @@ +module Language.SimpleShell.Parser + ( Parser + , lookupVar + , lookupFun + , initContext + , parseTest' + ) +where + + +import Language.SimpleShell.AST.Expr (FunName, VarName) +import Language.SimpleShell.AST.SimpleType (SimpleType, FunSig) + +import Control.Monad.Reader (ask, ReaderT, runReaderT) +import Data.Map (Map) +import qualified Data.Map as Map (empty, lookup) +import Data.Text (Text, unpack) +import Data.Void (Void) +import Text.Megaparsec + + +type Parser = ReaderT Context (Parsec Void Text) + +data Context = Context + { ctxVars :: Map VarName SimpleType + , ctxFuns :: Map FunName FunSig + } + +initContext :: Context +initContext = Context + { ctxVars = Map.empty + , ctxFuns = Map.empty + } + + +lookupVar :: VarName -> Parser SimpleType +lookupVar varname = do + mt <- Map.lookup varname . ctxVars <$> ask + case mt of + Just t -> return t + Nothing -> fail $ "Undeclared variable $" ++ unpack varname + +lookupFun :: FunName -> Parser FunSig +lookupFun fname = do + msig <- Map.lookup fname . ctxFuns <$> ask + case msig of + Just sig -> return sig + Nothing -> fail $ "Undefined function " ++ unpack fname + + +parseTest' :: Show a => Parser a -> Text -> IO () +parseTest' p = parseTest (runReaderT p initContext) diff --git a/src/Language/SimpleShell/Parser/Expr.hs b/src/Language/SimpleShell/Parser/Expr.hs new file mode 100644 index 0000000..d4d0b81 --- /dev/null +++ b/src/Language/SimpleShell/Parser/Expr.hs @@ -0,0 +1,182 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} + +module Language.SimpleShell.Parser.Expr + ( exprP + ) +where + + +import Language.SimpleShell.Parser (Parser, lookupVar, lookupFun) +import Language.SimpleShell.AST.Expr (Expr(..), TypedExpr, VarName, FunName) +import Language.SimpleShell.AST.SimpleType (SimpleType(..)) +import Control.Monad.Combinators.FailExpr + ( Associativity(..) + , makeExprParser + , Operator(..) + ) + +import Control.Applicative ((<|>)) +import Control.Monad.Combinators (manyTill) +import Data.Char (isAlpha) +import Data.Foldable (asum) +import Data.Text (Text) +import Text.Megaparsec (takeWhile1P) +import Text.Megaparsec.Char (string, char) +import qualified Text.Megaparsec.Char.Lexer as L (charLiteral, decimal) + + +exprP :: Parser TypedExpr +exprP + = makeExprParser termP binaryOperatorTable + <|> builtinUnaryFunP + <|> funP + +termP :: Parser TypedExpr +termP + = literalP + <|> varP + <|> unaryOpP + <|> char '(' *> exprP <* char ')' + + +-- | Parse expression with fixed type. +exprP' :: String -> SimpleType -> Parser Expr +exprP' errMsg t = do + (t', e) <- exprP + if t == t' + then return e + else fail errMsg + + +literalP :: Parser TypedExpr +literalP + = (IntType,) . IntLiteral <$> L.decimal + <|> (StrType,) . StrLiteral <$> strLitP + <|> (BoolType,) . BoolLiteral <$> boolLitP + where + strLitP = char '"' *> manyTill L.charLiteral (char '"') + boolLitP + = True <$ string "true" + <|> False <$ string "false" + +varP :: Parser TypedExpr +varP = do + _ <- char '$' + x <- varNameP + t <- lookupVar x + return (t, Var x) + +funP :: Parser TypedExpr +funP = do + fname <- funNameP + (t', ts) <- lookupFun fname + args <- mapM (exprP' "Type mismatch with function signature.") ts + return (t', FunCall fname args) + + +-- TODO +varNameP :: Parser VarName +varNameP = takeWhile1P Nothing isAlpha + +-- TODO +funNameP :: Parser FunName +funNameP = takeWhile1P Nothing isAlpha + + +-- Binary operators. + +type BinaryFun = Expr -> Expr -> Expr +type BinarySig = SimpleType -> SimpleType -> Maybe SimpleType + +binaryOperatorTable :: [[Operator Parser TypedExpr]] +binaryOperatorTable = + [ [ binary AssocR "||" Or $ sameSig BoolType + ] + , [ binary AssocR "&&" And $ sameSig BoolType + ] + , [ binary AssocN "==" Eq $ anyCmpSig + , binary AssocN "!=" Neq $ anyCmpSig + , binary AssocN "<=" Le $ intCmpSig + , binary AssocN "<" Lt $ intCmpSig + , binary AssocN ">=" Ge $ intCmpSig + , binary AssocN ">" Gt $ intCmpSig + ] + , [ Binary AssocL addP + , binary AssocL "-" Sub $ sameSig IntType + ] + , [ binary AssocL "*" Mul $ sameSig IntType + , binary AssocL "/" Div $ sameSig IntType + ] + ] + where + sameSig :: SimpleType -> BinarySig + sameSig t t1 t2 = if t == t1 && t == t2 then Just t else Nothing + + anyCmpSig t t' + | t == t' = Just BoolType + | otherwise = Nothing + + intCmpSig IntType IntType = Just BoolType + intCmpSig _ _ = Nothing + + addP :: Parser (TypedExpr -> TypedExpr -> Maybe TypedExpr) + addP = do + _ <- char '+' + return $ \(t1, x1) (t2, x2) -> + if t1 == t2 + then + case t1 of + StrType -> Just (StrType, Concat x1 x2) + IntType -> Just (IntType, Add x1 x2) + _ -> Nothing + else Nothing + + binary + :: Associativity -> Text -> BinaryFun -> BinarySig + -> Operator Parser TypedExpr + binary assoc symbol op sig = Binary assoc $ do + _ <- string symbol + return $ \(t1, e1) (t2, e2) -> fmap (, op e1 e2) $ sig t1 t2 + + +-- Unary operators and builtin unary functions. + +type UnaryFun = Expr -> Expr +type UnarySig = SimpleType -> Maybe SimpleType +type UnaryParser = Parser (TypedExpr -> Maybe TypedExpr) + +unaryOperators, builtinUnaryFuns :: [UnaryParser] +(unaryOperators, builtinUnaryFuns) = + ( [ unary "!" Not $ uniqueSig BoolType BoolType + , unary "-" UMinus $ uniqueSig IntType IntType + ] + , [ unary "length" Length $ uniqueSig StrType IntType + , unary "int" IntCast $ uniqueSig StrType IntType + , unary "str" StrCast $ uniqueSig IntType StrType + ] + ) + where + uniqueSig :: SimpleType -> SimpleType -> UnarySig + uniqueSig t1 t2 t1' + | t1 == t1' = Just t2 + | otherwise = Nothing + + unary :: Text -> UnaryFun -> UnarySig -> UnaryParser + unary symbol op sig = do + _ <- string symbol + return $ \(t, e) -> fmap (, op e) $ sig t + +unaryOpP, builtinUnaryFunP :: Parser TypedExpr +(unaryOpP, builtinUnaryFunP) = + ( asum $ map (aux "unary operator" termP) unaryOperators + , asum $ map (aux "builtin unary function" exprP) builtinUnaryFuns + ) + where + aux :: String -> Parser TypedExpr -> UnaryParser -> Parser TypedExpr + aux desc argP p = do + f <- p + x <- argP + case f x of + Just x' -> return x' + Nothing -> fail $ "Mismatching " ++ desc ++ " signature." |