(* Example of writing a lexing function by hand. * * The purpose of this file is mainly to illustrate that it's a bit * annoying and tedious to have to write lexers by hand. *) (* Define the datatype of tokens *) type token = | Int of int32 | Ident of string | OP_LPAREN | KW_IF (* This exception is raised when an unanticipated character is read *) exception Lex_error of char let is_character c = let i = Char.code c in ((65 <= i) && (i <= 90)) || ((97 <= i) || (i <= 122)) let is_digit c = let i = Char.code c in (48 <= i) && (i <= 57) let is_underscore c = c = '_' (* Reads an identifier from an input channel *) (* An identifier matches the regular expression: character (digit | '_' | character)* *) (* Returns the token (Ident "foo") *) (* Note: one problem with this approach is how to * implement "lookahead" -- this example doesn't do a * good job of that. *) let rec lex_ident (input:in_channel) = let buf = Buffer.create 16 in let c = input_char input in let rec loop () = try let c = input_char input in if (is_character c || is_digit c || is_underscore c) then (Buffer.add_char buf c; loop () ) else Ident (Buffer.contents buf) with End_of_file -> Ident(Buffer.contents buf) in if is_character c then (Buffer.add_char buf c; loop ()) else raise (Lex_error c) ;; try (let x = lex_ident stdin in match x with | Ident s -> print_string s | _ -> ()) with Lex_error c -> print_char c