1 (***********************************************************************)
3 (* MLTk, Tcl/Tk interface of Objective Caml *)
5 (* Francois Rouaix, Francois Pessaux, Jun Furuse and Pierre Weis *)
6 (* projet Cristal, INRIA Rocquencourt *)
7 (* Jacques Garrigue, Kyoto University RIMS *)
9 (* Copyright 2002 Institut National de Recherche en Informatique et *)
10 (* en Automatique and Kyoto University. All rights reserved. *)
11 (* This file is distributed under the terms of the GNU Library *)
12 (* General Public License, with the special exception on linking *)
13 (* described in file ../LICENSE. *)
15 (***********************************************************************)
17 (* $Id: lexer.mll 5029 2002-07-23 14:12:03Z doligez $ *)
25 exception Lexical_error of string
26 let current_line = ref 1
29 (* The table of keywords *)
31 let keyword_table = (Hashtbl.create 149 : (string, token) Hashtbl.t)
34 ~f:(fun (str,tok) -> Hashtbl.add keyword_table str tok)
56 (* To buffer string literals *)
58 let initial_string_buffer = String.create 256
59 let string_buff = ref initial_string_buffer
60 let string_index = ref 0
62 let reset_string_buffer () =
63 string_buff := initial_string_buffer;
67 let store_string_char c =
68 if !string_index >= String.length (!string_buff) then begin
69 let new_buff = String.create (String.length (!string_buff) * 2) in
70 String.blit ~src:(!string_buff) ~src_pos:0 ~dst:new_buff ~dst_pos:0
71 ~len:(String.length (!string_buff));
72 string_buff := new_buff
74 String.set (!string_buff) (!string_index) c;
77 let get_stored_string () =
78 let s = String.sub (!string_buff) ~pos:0 ~len:(!string_index) in
79 string_buff := initial_string_buffer;
81 (* To translate escape sequences *)
83 let char_for_backslash = function
90 let char_for_decimal_code lexbuf i =
91 Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
92 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
93 (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48))
95 let saved_string_start = ref 0
100 '\010' { incr current_line; main lexbuf }
101 | [' ' '\013' '\009' '\026' '\012'] +
103 | ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255' ]
104 ( '_' ? ['A'-'Z' 'a'-'z' '\192'-'\214' '\216'-'\246' '\248'-'\255' (*'*) '0'-'9' ] ) *
105 { let s = Lexing.lexeme lexbuf in
107 Hashtbl.find keyword_table s
112 { reset_string_buffer();
113 (* Start of token is start of string. *)
114 saved_string_start := lexbuf.lex_start_pos;
116 lexbuf.lex_start_pos <- !saved_string_start;
117 STRING (get_stored_string()) }
129 | "%" { comment lexbuf; main lexbuf }
130 | "##line" { line lexbuf; main lexbuf }
133 { raise (Lexical_error("illegal character")) }
139 | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] +
141 | '\\' ['\\' '"' 'n' 't' 'b' 'r']
142 { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
144 | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
145 { store_string_char(char_for_decimal_code lexbuf 1);
148 { raise (Lexical_error("string not terminated")) }
151 store_string_char(Lexing.lexeme_char lexbuf 0);
154 { store_string_char(Lexing.lexeme_char lexbuf 0);
158 '\010' { incr current_line }
160 | _ { comment lexbuf }
164 let next_line = int_of_string (Lexing.lexeme lexbuf) in
165 current_line := next_line - 1
167 | _ { raise (Lexical_error("illegal ##line directive: no line number"))}
170 | [' ' '\t']* { linenum lexbuf }