First some simple examples to get the flavor of how one uses ocamllex. The following ocamllex input specifies a scanner which whenever it encounters the string “current_directory” will replace it with the current directory:
{ }
rule translate = parse
| "current_directory" { print_string (Sys.getcwd ()); translate lexbuf }
| _ as c { print_char c; translate lexbuf }
| eof { exit 0 }
In the first rule, “current_directory” is the
Any text not matched by a
Here’s another simple example:
{
let num_lines = ref 0
let num_chars = ref 0
}
rule count = parse
| '\n' { incr num_lines; incr num_chars; count lexbuf }
| _ { incr num_chars; count lexbuf }
| eof { () }
{
let main () =
let lexbuf = Lexing.from_channel stdin in
count lexbuf;
Printf.printf "# of lines = %d, # of chars = %d\n" !num_lines !num_chars
let _ = Printexc.print main ()
}
This scanner counts the number of characters and the number of lines in its input (it produces no output other than the final report on the counts). The first header section declares two globals, “num_lines” and “num_chars”, which are accessible both inside scanner function count and in the trailer section which is the last part enclosed by braces. There are three rules, one which matches a newline (”\n”) and increments both the line count and the character count, and one which matches any character other than a newline (indicated by the “_” regular expression). At the end of file, the scanner function count returns unit.
A somewhat more complicated example:
(* scanner for a toy language *)
{
open Printf
}
let digit = ['0'-'9']
let id = ['a'-'z'] ['a'-'z' '0'-'9']*
rule toy_lang = parse
| digit+ as inum
{ printf "integer: %s (%d)\n" inum (int_of_string inum);
toy_lang lexbuf
}
| digit+ '.' digit* as fnum
{ printf "float: %s (%f)\n" fnum (float_of_string fnum);
toy_lang lexbuf
}
| "if"
| "then"
| "begin"
| "end"
| "let"
| "in"
| "function" as word
{ printf "keyword: %s\n" word;
toy_lang lexbuf
}
| id as text
{ printf "identifier: %s\n" text;
toy_lang lexbuf
}
| '+'
| '-'
| '*'
| '/' as op
{ printf "operator: %c\n" op;
toy_lang lexbuf
}
| '{' [^ '\n']* '}' { toy_lang lexbuf } (* eat up one-line comments *)
| [' ' '\t' '\n'] { toy_lang lexbuf } (* eat up whitespace *)
| _ as c
{ printf "Unrecognized character: %c\n" c;
toy_lang lexbuf
}
| eof { }
{
let main () =
let cin =
if Array.length Sys.argv > 1
then open_in Sys.argv.(1)
else stdin
in
let lexbuf = Lexing.from_channel cin in
toy_lang lexbuf
let _ = Printexc.print main ()
}
This is the beginnings of a simple scanner for a language.
It identifies different types of
The details of this example will be explained in the following sections.