diff options
author | Einhard Leichtfuß <alguien@respiranto.de> | 2025-05-19 21:17:52 +0200 |
---|---|---|
committer | Einhard Leichtfuß <alguien@respiranto.de> | 2025-05-19 21:17:52 +0200 |
commit | 36ae80ba404aef44dcc8d606e4d3b399ee05c6af (patch) | |
tree | 861fe5e372828c720b63945d3a8d959f654fb06a | |
parent | 1eef63739d9da401bdd4c89da2cfd8d3d9b7acb6 (diff) |
Add function parser
Also, we now define and use proper symbol and keyword parsers.
-rw-r--r-- | src/Language/SimpleShell/AST/Function.hs | 4 | ||||
-rw-r--r-- | src/Language/SimpleShell/Parser.hs | 22 | ||||
-rw-r--r-- | src/Language/SimpleShell/Parser/Expr.hs | 75 | ||||
-rw-r--r-- | src/Language/SimpleShell/Parser/Function.hs | 36 | ||||
-rw-r--r-- | src/Language/SimpleShell/Parser/Name.hs | 40 | ||||
-rw-r--r-- | src/Language/SimpleShell/Parser/SimpleType.hs | 27 |
6 files changed, 161 insertions, 43 deletions
diff --git a/src/Language/SimpleShell/AST/Function.hs b/src/Language/SimpleShell/AST/Function.hs index 7594c0d..db85585 100644 --- a/src/Language/SimpleShell/AST/Function.hs +++ b/src/Language/SimpleShell/AST/Function.hs @@ -5,8 +5,8 @@ where import Language.SimpleShell.AST.Expr (Expr) -import Language.SimpleShell.AST.Name (VarName) +import Language.SimpleShell.AST.Name (FunName, VarName) -data Function = Function [VarName] Expr +data Function = Function FunName [VarName] Expr deriving (Show) diff --git a/src/Language/SimpleShell/Parser.hs b/src/Language/SimpleShell/Parser.hs index dd65ffe..d75602b 100644 --- a/src/Language/SimpleShell/Parser.hs +++ b/src/Language/SimpleShell/Parser.hs @@ -3,8 +3,10 @@ module Language.SimpleShell.Parser ( Parser , lexeme + , symbol , lookupVar , lookupFun + , declareVars , initContext , parseTest ) @@ -12,18 +14,22 @@ where import Language.SimpleShell.AST.Name (FunName, VarName) -import Language.SimpleShell.AST.SimpleType (SimpleType, FunSig) +import Language.SimpleShell.AST.SimpleType (SimpleType, FunSig, Typed) -import Control.Monad.Reader (ask, ReaderT, runReaderT) +import Control.Monad (void) +import Control.Monad.Reader (ask, local, ReaderT, runReaderT) +import Data.List (nub) import Data.Map (Map) -import qualified Data.Map as Map (empty, lookup) +import qualified Data.Map as Map (empty, lookup, fromList) import Data.Text (Text, unpack) +import Data.Tuple (swap) import Data.Void (Void) import Text.Megaparsec (Parsec) import qualified Text.Megaparsec as MP (parseTest) import Text.Megaparsec.Char (space1) import qualified Text.Megaparsec.Char.Lexer as L ( lexeme + , symbol , space , skipLineComment , skipBlockComment @@ -53,6 +59,9 @@ sc = L.space lexeme :: Parser a -> Parser a lexeme = L.lexeme sc +symbol :: Text -> Parser () +symbol = void . L.symbol sc + lookupVar :: VarName -> Parser SimpleType lookupVar varname = do @@ -68,6 +77,13 @@ lookupFun fname = do Just sig -> return sig Nothing -> fail $ "Undefined function " ++ unpack fname +declareVars :: [Typed VarName] -> Parser a -> Parser a +declareVars decls p = do + let names = map snd decls + if length (nub names) == length names -- TODO: inefficient + then local (\ctx -> ctx { ctxVars = Map.fromList $ map swap decls }) p + else fail "Duplicate variable name." + parseTest :: Show a => Parser a -> Text -> IO () parseTest p = MP.parseTest (runReaderT p initContext) diff --git a/src/Language/SimpleShell/Parser/Expr.hs b/src/Language/SimpleShell/Parser/Expr.hs index ac6c340..05120ce 100644 --- a/src/Language/SimpleShell/Parser/Expr.hs +++ b/src/Language/SimpleShell/Parser/Expr.hs @@ -3,14 +3,22 @@ module Language.SimpleShell.Parser.Expr ( exprP + , strongTermP ) where -import Language.SimpleShell.Parser (Parser, lexeme, lookupVar, lookupFun) import Language.SimpleShell.AST.Expr (Expr(..), TypedExpr) -import Language.SimpleShell.AST.Name (FunName, VarName) import Language.SimpleShell.AST.SimpleType (SimpleType(..)) +import Language.SimpleShell.Parser + ( Parser + , lexeme + , symbol + , lookupVar + , lookupFun + ) +import Language.SimpleShell.Parser.Name (funNameP, varNameP, keyword) +import Language.SimpleShell.Parser.SimpleType (forceType) import Control.Monad.Combinators.FailExpr ( Associativity(..) , makeExprParser @@ -19,11 +27,9 @@ import Control.Monad.Combinators.FailExpr import Control.Applicative ((<|>)) import Control.Monad.Combinators (manyTill) -import Data.Char (isAlpha) import Data.Foldable (asum) import Data.Text (Text) -import Text.Megaparsec (takeWhile1P) -import Text.Megaparsec.Char (string, char) +import Text.Megaparsec.Char (char) import qualified Text.Megaparsec.Char.Lexer as L (charLiteral, decimal) @@ -42,31 +48,27 @@ strongTermP :: Parser TypedExpr strongTermP = literalP <|> varP - <|> lexeme (char '(') *> exprP <* lexeme (char ')') + <|> symbol "(" *> exprP <* symbol ")" -- | Parse "strong" term with fixed type. strongTermP' :: String -> SimpleType -> Parser Expr -strongTermP' errMsg t = do - (t', e) <- strongTermP - if t == t' - then return e - else fail errMsg +strongTermP' errMsg t = forceType t strongTermP <|> fail errMsg literalP :: Parser TypedExpr literalP = (IntType,) . IntLiteral <$> lexeme L.decimal - <|> (StrType,) . StrLiteral <$> lexeme strLitP - <|> (BoolType,) . BoolLiteral <$> lexeme boolLitP + <|> (StrType,) . StrLiteral <$> strLitP + <|> (BoolType,) . BoolLiteral <$> boolLitP where - strLitP = char '"' *> manyTill L.charLiteral (char '"') + strLitP = lexeme $ char '"' *> manyTill L.charLiteral (char '"') boolLitP - = True <$ string "true" - <|> False <$ string "false" + = True <$ keyword "true" + <|> False <$ keyword "false" varP :: Parser TypedExpr -varP = lexeme $ do +varP = do _ <- char '$' x <- varNameP t <- lookupVar x @@ -74,21 +76,12 @@ varP = lexeme $ do funP :: Parser TypedExpr funP = do - fname <- lexeme funNameP + fname <- funNameP (t', ts) <- lookupFun fname args <- mapM (strongTermP' "Type mismatch with function signature.") ts return (t', FunCall fname args) --- TODO -varNameP :: Parser VarName -varNameP = takeWhile1P Nothing isAlpha - --- TODO -funNameP :: Parser FunName -funNameP = takeWhile1P Nothing isAlpha - - -- Binary operators. type BinaryFun = Expr -> Expr -> Expr @@ -127,7 +120,7 @@ binaryOperatorTable = addP :: Parser (TypedExpr -> TypedExpr -> Maybe TypedExpr) addP = do - _ <- lexeme $ char '+' + symbol "+" return $ \(t1, x1) (t2, x2) -> if t1 == t2 then @@ -140,8 +133,8 @@ binaryOperatorTable = binary :: Associativity -> Text -> BinaryFun -> BinarySig -> Operator Parser TypedExpr - binary assoc symbol op sig = Binary assoc $ do - _ <- lexeme $ string symbol + binary assoc symb op sig = Binary assoc $ do + symbol symb return $ \(t1, e1) (t2, e2) -> fmap (, op e1 e2) $ sig t1 t2 @@ -153,12 +146,12 @@ type UnaryParser = Parser (TypedExpr -> Maybe TypedExpr) unaryOperators, builtinUnaryFuns :: [UnaryParser] (unaryOperators, builtinUnaryFuns) = - ( [ unary "!" Not $ uniqueSig BoolType BoolType - , unary "-" UMinus $ uniqueSig IntType IntType + ( [ unaryOp "!" Not $ uniqueSig BoolType BoolType + , unaryOp "-" UMinus $ uniqueSig IntType IntType ] - , [ unary "length" Length $ uniqueSig StrType IntType - , unary "int" IntCast $ uniqueSig StrType IntType - , unary "str" StrCast $ uniqueSig IntType StrType + , [ unaryFun "length" Length $ uniqueSig StrType IntType + , unaryFun "int" IntCast $ uniqueSig StrType IntType + , unaryFun "str" StrCast $ uniqueSig IntType StrType ] ) where @@ -167,9 +160,15 @@ unaryOperators, builtinUnaryFuns :: [UnaryParser] | t1 == t1' = Just t2 | otherwise = Nothing - unary :: Text -> UnaryFun -> UnarySig -> UnaryParser - unary symbol op sig = do - _ <- lexeme $ string symbol + unaryOp :: Text -> UnaryFun -> UnarySig -> UnaryParser + unaryOp = unary symbol + + unaryFun :: Text -> UnaryFun -> UnarySig -> UnaryParser + unaryFun = unary keyword + + unary :: (Text -> Parser ()) -> Text -> UnaryFun -> UnarySig -> UnaryParser + unary symbPF symb op sig = do + symbPF symb return $ \(t, e) -> fmap (, op e) $ sig t unaryOpP, builtinUnaryFunP :: Parser TypedExpr diff --git a/src/Language/SimpleShell/Parser/Function.hs b/src/Language/SimpleShell/Parser/Function.hs new file mode 100644 index 0000000..29ebbaa --- /dev/null +++ b/src/Language/SimpleShell/Parser/Function.hs @@ -0,0 +1,36 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Language.SimpleShell.Parser.Function + ( functionP + ) +where + + +import Language.SimpleShell.AST.Expr (Expr) +import Language.SimpleShell.AST.Function (Function(..)) +import Language.SimpleShell.AST.Name (VarName) +import Language.SimpleShell.AST.SimpleType (FunSig, Typed) +import Language.SimpleShell.Parser (Parser, symbol, declareVars) +import Language.SimpleShell.Parser.Expr (strongTermP) +import Language.SimpleShell.Parser.Name (funNameP, varNameP, keyword) +import Language.SimpleShell.Parser.SimpleType (simpleTypeP, forceType) + +import Control.Applicative ((<|>)) +import Text.Megaparsec (sepBy) + + +functionP :: Parser (FunSig, Function) +functionP = do + keyword "function" + t' <- simpleTypeP + fname <- funNameP + params <- symbol "(" *> paramP `sepBy` symbol "," <* symbol ")" + let (ts, vars) = unzip params + body <- forceType t' (bodyP params) <|> fail "Function return type mismatch." + return ((t', ts), Function fname vars body) + +paramP :: Parser (Typed VarName) +paramP = (,) <$> simpleTypeP <*> varNameP + +bodyP :: [Typed VarName] -> Parser (Typed Expr) +bodyP params = declareVars params strongTermP diff --git a/src/Language/SimpleShell/Parser/Name.hs b/src/Language/SimpleShell/Parser/Name.hs new file mode 100644 index 0000000..635d987 --- /dev/null +++ b/src/Language/SimpleShell/Parser/Name.hs @@ -0,0 +1,40 @@ +module Language.SimpleShell.Parser.Name + ( varNameP + , funNameP + , keyword + ) +where + + +import Language.SimpleShell.AST.Name (FunName, VarName) +import Language.SimpleShell.Parser (Parser, lexeme) + +import Control.Monad (void) +import Data.Text (Text, cons) +import Data.Char (isAlpha, isAlphaNum) +import Text.Megaparsec (takeWhileP, satisfy, notFollowedBy, try) +import Text.Megaparsec.Char (string) + + +isNameStartChar :: Char -> Bool +isNameStartChar c = isAlpha c || c == '_' + +isNameChar :: Char -> Bool +isNameChar c = isAlphaNum c || c == '_' + + +nameP :: Parser Text +nameP = + lexeme $ cons <$> satisfy isNameStartChar <*> takeWhileP Nothing isNameChar + + +keyword :: Text -> Parser () +keyword kw = + void $ lexeme $ try $ string kw <* notFollowedBy (satisfy isNameChar) + + +varNameP :: Parser VarName +varNameP = nameP + +funNameP :: Parser FunName +funNameP = nameP diff --git a/src/Language/SimpleShell/Parser/SimpleType.hs b/src/Language/SimpleShell/Parser/SimpleType.hs new file mode 100644 index 0000000..b32c99a --- /dev/null +++ b/src/Language/SimpleShell/Parser/SimpleType.hs @@ -0,0 +1,27 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Language.SimpleShell.Parser.SimpleType + ( simpleTypeP + , forceType + ) +where + + +import Language.SimpleShell.AST.SimpleType (SimpleType(..), Typed) +import Language.SimpleShell.Parser (Parser) +import Language.SimpleShell.Parser.Name (keyword) + +import Control.Applicative ((<|>)) +import Control.Monad (guard) + + +simpleTypeP :: Parser SimpleType +simpleTypeP + = IntType <$ keyword "int" + <|> StrType <$ keyword "str" + <|> BoolType <$ keyword "bool" + +forceType :: SimpleType -> Parser (Typed a) -> Parser a +forceType t p = do + (t', x) <- p + guard (t == t') >> return x |