;; ** * * * * * * * * * * * * * * * * ** ;; ** Start of the AP-F Parser... ** ;; ** * * * * * * * * * * * * * * * * ** (require (lib "lex.ss" "parser-tools") (prefix : (lib "lex-sre.ss" "parser-tools"))) ;; End-Of-File Token (define-struct EOF ()) ;; Lexer Abbreviations (define-lex-abbrevs (lower-letter (:/ "a" "z")) (upper-letter (:/ #\A #\Z)) (digit (:/ "0" "9"))) ;; Simple Lexer... Could add String constants as well (define the-lexer (lexer [(eof) (make-EOF)] [(:or #\tab #\space #\newline #\linefeed) (the-lexer input-port)] [(:or "=" "+" "-" "*" "/" "(" ")" "{" "}") lexeme] [(:: "t" "r" "u" "e") true] [(:: "f" "a" "l" "s" "e") false] [(:+ (:or lower-letter upper-letter)) (string->symbol lexeme)] [(:+ digit) (string->number lexeme)] [(:: "\"" (:+ (char-complement "\"")) "\"") lexeme] [(:: (:+ digit) #\. (:* digit)) (string->number lexeme)])) ;;** A Rule is either: ;; -- Prod-Rule, or ;; -- Sum-Rule ;; Prod-Rule is (make-prod-rule Symbol (listof Symbol-Or-String)) (define-struct prod-rule (sym goesto)) ;; Sum-Rule is (make-sum-rule Symbol (listof Symbol)) (define-struct sum-rule (sym goesto)) ;; rule-sym: Rule -> Symbol ;; Get the name of a Rule (define (rule-sym r) (cond [(prod-rule? r) (prod-rule-sym r)] [(sum-rule? r) (sum-rule-sym r)] [else (error 'bad "BAD: ~a" r)])) ;; The EmptySet == the EmptyList (define empty-set empty) ;; set-single: Any -> Set ;; Make a singleton Set (define (set-single it) (list it)) ;; set-contains?: Set Any -> Boolean ;; Does the given Set contain the given element? (define (set-contains? set it) (if (empty? set) false (or (equal? (first set) it) (set-contains? (rest set) it)))) ;; set-union: Set Set -> Set ;; Compute the Union of two Sets (define (set-union set1 set2) (cond [(empty? set1) set2] [(empty? set2) set1] [(set-contains? set2 (first set1)) (set-union (rest set1) set2)] [else (cons (first set1) (set-union (rest set1) set2))])) ;; first-set-rules: Symbol (listof Rules) (listof Rules) -> Set ;; First set of a rule without 'empty's (define (first-set-rules sym lor glor) (if (empty? lor) empty-set (let ((r (first lor))) (if (symbol=? (rule-sym r) sym) (cond [(prod-rule? r) (first-set (prod-rule-goesto r) glor)] [(sum-rule? r) (foldl (lambda (s set) (set-union (first-set-rules s glor glor) set)) empty-set (sum-rule-goesto r))]) ;; Keep looking... (first-set-rules sym (rest lor) glor))))) ;; first-set: List-Of-Token Set-of-String -> Set-of-String ;; First set without 'empty's (define (first-set lot lor) (if (empty? lot) (error 'first-set "Cannot Be Empty") (let ((f (first lot))) (cond [(string? f) (set-single f)] [(symbol? f) (cond [(or (symbol=? f 'number) (symbol=? f 'string) (symbol=? f 'symbol) (symbol=? f 'boolean)) (set-single f)] [else (first-set-rules (first lot) lor lor)])])))) ;; grammar-syms: (listof Rule) -> (listof Symbol) ;; Return just the (define (grammar-syms lor) (foldl (lambda (r s) (set-union (set-single (rule-sym r)) s)) empty-set lor)) ;; all-first-sets: (listof Rules) -> (listof (listof Symbol Set)) ;; Compute all the first-sets of all the Rules (define (all-first-sets lor) (map (lambda (r) (let ((s (rule-sym r))) (list s (first-set-rules s lor lor)))) lor)) ;; type-of: Any -> Symbol ;; Return the TypeSymbol of a Token (define (type-of a) (cond [(symbol? a) 'symbol] [(number? a) 'number] [(string? a) 'string] [(boolean? a) 'boolean] [(EOF? a) 'EOF] [else 'unknown])) ;; current-token: Any ;; The Current input Token... '() if none (define current-token '()) ;; peek-token: Lexer -> Any ;; Peek at the first token of the input, don't remove it (define (peek-token lex) (if (empty? current-token) (set! current-token (lex))) current-token) ;; next-token: Lexer -> Any ;; Get and remove the next Token from teh input (define (next-token lex) (if (empty? current-token) (lex) (let ((tmp current-token)) (set! current-token '()) tmp))) ;; make-the-lexer: InputPort -> Lexer ;; Make a Lexer for the given InputPort (define (make-the-lexer port) (lambda () (the-lexer port))) ;; parse: (listof Rule) Symbol InputPort -> Any ;; Parse an InputPort using the given Rules, starting with the given Symbol (define (parse lor start port) (let* ((firsts (all-first-sets lor)) (lexer (make-the-lexer port)) (result (parse-sym lor firsts start lexer))) (begin (parse-sym lor firsts 'EOF lexer) result))) ;; parse-sym: (listof Rule) (listof FirstSets) Symbol Lexer -> Any ;; Parse a specific Type (Symbol) (define (parse-sym lor firsts sym lexer) (cond [(or (symbol=? sym 'number) (symbol=? sym 'symbol) (symbol=? sym 'string) (symbol=? sym 'boolean) (symbol=? sym 'EOF)) (let* ((tok (next-token lexer)) (tt (type-of tok))) (if (not (symbol=? sym tt)) (error 'parser " Expected <~a> got <~a>!!" sym tt) tok))] [else (let* ((r (find-rule sym lor)) (name (rule-sym r))) (cond [(prod-rule? r) (parse-prod lor firsts name (prod-rule-goesto r) lexer)] [(sum-rule? r) (parse-choice lor firsts name (sum-rule-goesto r) (peek-token lexer) lexer)]))])) ;; parse-choice: (listof Rule) (listof FirstSets) Symbol (listof Symbol) Any Lexer -> Any ;; Parse a Union (sum type), choose a concrete type based on the given FirstSets (define (parse-choice lor firsts name choices tok lexer) (if (empty? choices) (error 'parse-choice "No Choice Found For <~a> Starting with \"~a\" <~a>" name tok (type-of tok)) (let ((fst (find-first (car choices) firsts))) (if (or (set-contains? fst (type-of tok)) (set-contains? fst tok)) (parse-sym lor firsts (first choices) lexer) (parse-choice lor firsts name (cdr choices) tok lexer))))) ;; find-rule: Symbol (listof Rule) -> Rule ;; Find the Parse rule for the given Symbol (define (find-rule sym lor) (cond [(null? lor) (error 'find-rule "Rule Not Found For <~a>" sym)] [(symbol=? (rule-sym (car lor)) sym) (car lor)] [else (find-rule sym (cdr lor))])) ;; find-first: Symbol (listof FirstSets) -> (listof String-Or-Symbol) ;; Find the FirstSets for the given Symbol (define (find-first sym lof) (cond [(null? lof) (error 'find-first "BAD")] [(symbol=? sym (caar lof)) (cadar lof)] [else (find-first sym (cdr lof))])) ;; creator-name: Symbol -> Symbol ;; What is the symbol for the creator of the given Type? (define (creator-name sym) (string->symbol (string-append "make-" (symbol->string sym)))) ;; parse-string: String Lexer -> Void ;; Parse a Terminal (String) by ignoring it... (define (parse-string str lexer) (let ((tok (next-token lexer))) (if (not (and (string? tok) (string=? tok str))) (error 'parse-string "Expected String \"~a\" Found \"~a\"" str tok)))) ;; parse-prod: (listof Rule) (listof FirstSets) Symbol (listof String-Or-Symbol) Lexer -> Any ;; Parse a Product Rule for 'name', with the body 'goesto' (define (parse-prod lor firsts name goesto lexer) (apply (eval (creator-name name)) (reverse (foldl (lambda (tt lst) (cond [(symbol? tt) (cons (parse-sym lor firsts tt lexer) lst)] [(string? tt) (begin (parse-string tt lexer) lst)] [else (error 'parse-rule "Bad Parsable <~a> For ~a" tt name)])) '() goesto)))) ;; * * * * * * * * * * * ;; * TESTING * ;; * * * * * * * * * * * ;; Structures (define-struct Leaf (n)) (define-struct True ()) (define-struct False ()) (define-struct Node (left right)) ;; Grammar (Abstr and Concrete Syntax) (define gram (list (make-sum-rule 'Tree '(Node Leaf)) (make-prod-rule 'Leaf '(boolean)) (make-prod-rule 'Node '("(" Tree Tree ")")))) ;; boolean->string: Boolean-> String (define (boolean->string b) (if b "true" "false")) ;; Tree->string: Tree -> String (define (Tree->string t) (cond [(Leaf? t) (string-append "(leaf " (boolean->string (Leaf-n t)) ")")] [else (string-append "(node " (Tree->string (Node-left t)) ", " (Tree->string (Node-right t)) ")")])) ;;** Try everything out (Tree->string (parse gram 'Tree (open-input-string "(true false)")))