{- PLEASE INCLUDE YOUR NAME, UW NETID, AND SECTION HERE ON THE VERSION YOU TURN IN -} {- CSE 341, Spring 2018. Starter definition of a parser for Octopus for use in the Octopus interpreter. -} { module OctoParser where import Data.Char } %name octoparse %tokentype { Token } %error { parseError } %token integer { TokenInt $$ } boolean { TokenBool $$ } atom { TokenAtom $$ } '(' { TokenLeftParen } ')' { TokenRightParen } '\'' { TokenQuote } %% OctoValue : '(' OctoValues ')' { OctoList $2 } | atom { OctoSymbol $1 } | integer { OctoInt $1 } | boolean { OctoBool $1 } | '\'' OctoValue { OctoList [OctoSymbol "quote", $2] } OctoValues : {- empty -} { [] } | OctoValue OctoValues { $1 : $2 } { parseError :: [Token] -> a parseError _ = error "Parse error" -- An environment is a list of (name,value) pairs. The name will -- be an OctoSymbol (although the type declaration doesn't capture this). type Environment = [(OctoValue,OctoValue)] {- Declarations of the datatype for Octopus data. The constructors used in data produced by the parser are OctoInt (Octopus integers), OctoBool (Octopus booleans), OctoSymbol (Octopus symbols, or atoms), and OctoList (lists). The remaining 2 types, OctoClosure and OctoPrimitive, are not actually used by the parser, just the interpreter.-} data OctoValue = OctoInt Int | OctoBool Bool | OctoSymbol String | OctoList [OctoValue] | OctoClosure [OctoValue] Environment OctoValue | OctoPrimitive String deriving (Show, Eq) data Token = TokenInt Int | TokenBool Bool | TokenAtom String | TokenLeftParen | TokenRightParen | TokenQuote deriving (Show, Eq) -- a lexer to take the input string and break it into a list of tokens lexer :: String -> [Token] lexer [] = [] lexer (c:cs) | isSpace c = lexer cs | isDigit c = lexNum (c:cs) | isRacketAtomStartChar c = lexAtom (c:cs) lexer ('(':cs) = TokenLeftParen : lexer cs lexer (')':cs) = TokenRightParen : lexer cs lexer ('\'':cs) = TokenQuote : lexer cs lexNum cs = TokenInt (read num) : lexer rest where (num,rest) = span isDigit cs -- lexAtom looks for a symbol. But we also need to handle #t and #f, and -- also the special case of an integer +3 or -5 (both legal in Racket) lexAtom cs = result : lexer rest where (t:ts,rest) = span isRacketAtomChar cs result = lexAtomHelper t ts lexAtomHelper t ts | (t=='+' || t=='-') && all isDigit ts && not (null ts) = TokenInt (read ts * (if t=='-' then -1 else 1)) | t=='#' && ts=="t" = TokenBool True | t=='#' && ts=="f" = TokenBool False | otherwise = TokenAtom $ (t:ts) isRacketAtomStartChar c = isAlpha c || elem c "!#$%&|*+-/:<=>?@^_~" isRacketAtomChar c = isRacketAtomStartChar c || isDigit c parse = octoparse . lexer }