{- CSE 341, Winter 2014. Definition of a parser for Octopus for use in the Octopus interpreter. -} { module OctoParser where import Data.Char } %name octoparse %tokentype { Token } %error { parseError } %token integer { TokenInt $$ } atom { TokenAtom $$ } '(' { TokenLeftParen } ')' { TokenRightParen } '\'' { TokenQuote } %% OctoExpr : '(' OctoExprs ')' { OctoList $2 } | atom { OctoSymbol $1 } | integer { OctoInt $1 } | '\'' OctoExpr { OctoList [OctoSymbol "quote", $2] } OctoExprs : {- empty -} { [] } | OctoExpr OctoExprs { $1 : $2 } { parseError :: [Token] -> a parseError _ = error "Parse error" -- An environment is a list of (name,value) pairs. type Environment = [(String,OctoExpr)] {- Declarations of the datatype for Octopus data. The constructors used in data produced by the parser are OctoInt (Octopus integers), OctoSymbol (Octopus symbols, or atoms), and OctoList (lists). The remaining two types, OctoClosure and OctoPrimitive are not actually used by the parser, just the interpreter.-} data OctoExpr = OctoInt Int | OctoSymbol String | OctoList [OctoExpr] | OctoClosure [String] Environment OctoExpr | OctoPrimitive String deriving (Show, Eq) data Token = TokenInt Int | TokenAtom String | TokenLeftParen | TokenRightParen | TokenQuote deriving (Show, Eq) -- a lexer to take the input string and break it into a list of tokens lexer :: String -> [Token] lexer [] = [] lexer (c:cs) | isSpace c = lexer cs | isDigit c = lexNum (c:cs) | isRacketAtomStartChar c = lexAtom (c:cs) lexer ('(':cs) = TokenLeftParen : lexer cs lexer (')':cs) = TokenRightParen : lexer cs lexer ('\'':cs) = TokenQuote : lexer cs lexNum cs = TokenInt (read num) : lexer rest where (num,rest) = span isDigit cs -- lexAtom looks for a symbol. But we also need to handle the picky -- special case of an integer +3 or -5 (both legal in Racket) lexAtom cs = result : lexer rest where (a@(t:ts),rest) = span isRacketAtomChar cs result = if (t=='+' || t=='-') && all isDigit ts && not (null ts) then TokenInt (read ts * sign) else TokenAtom $ a sign = if t=='-' then -1 else 1 isRacketAtomStartChar c = isAlpha c || elem c "!#$%&|*+-/:<=>?@^_~" isRacketAtomChar c = isRacketAtomStartChar c || isDigit c parse = octoparse . lexer }