1 (***********************************************************************)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
7 (* Copyright 1996 Institut National de Recherche en Informatique et *)
8 (* en Automatique. All rights reserved. This file is distributed *)
9 (* under the terms of the Q Public License version 1.0. *)
11 (***********************************************************************)
13 (* $Id: lexer.mll 9079 2008-10-08 13:09:39Z doligez $ *)
15 (* The lexer definition *)
23 | Illegal_character of char
24 | Illegal_escape of string
25 | Unterminated_comment
27 | Unterminated_string_in_comment
28 | Keyword_as_label of string
29 | Literal_overflow of string
32 exception Error of error * Location.t;;
34 (* The table of keywords *)
37 create_hashtable 149 [
43 "constraint", CONSTRAINT;
49 "exception", EXCEPTION;
60 "initializer", INITIALIZER;
72 (* "parser", PARSER; *)
88 "mod", INFIXOP3("mod");
89 "land", INFIXOP3("land");
90 "lor", INFIXOP3("lor");
91 "lxor", INFIXOP3("lxor");
92 "lsl", INFIXOP4("lsl");
93 "lsr", INFIXOP4("lsr");
94 "asr", INFIXOP4("asr")
97 (* To buffer string literals *)
99 let initial_string_buffer = String.create 256
100 let string_buff = ref initial_string_buffer
101 let string_index = ref 0
103 let reset_string_buffer () =
104 string_buff := initial_string_buffer;
107 let store_string_char c =
108 if !string_index >= String.length (!string_buff) then begin
109 let new_buff = String.create (String.length (!string_buff) * 2) in
110 String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff));
111 string_buff := new_buff
113 String.unsafe_set (!string_buff) (!string_index) c;
116 let get_stored_string () =
117 let s = String.sub (!string_buff) 0 (!string_index) in
118 string_buff := initial_string_buffer;
121 (* To store the position of the beginning of a string and comment *)
122 let string_start_loc = ref Location.none;;
123 let comment_start_loc = ref [];;
124 let in_comment () = !comment_start_loc <> [];;
126 (* To translate escape sequences *)
128 let char_for_backslash = function
135 let char_for_decimal_code lexbuf i =
136 let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
137 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
138 (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in
139 if (c < 0 || c > 255) then
142 else raise (Error(Illegal_escape (Lexing.lexeme lexbuf),
143 Location.curr lexbuf))
146 let char_for_hexadecimal_code lexbuf i =
147 let d1 = Char.code (Lexing.lexeme_char lexbuf i) in
148 let val1 = if d1 >= 97 then d1 - 87
149 else if d1 >= 65 then d1 - 55
152 let d2 = Char.code (Lexing.lexeme_char lexbuf (i+1)) in
153 let val2 = if d2 >= 97 then d2 - 87
154 else if d2 >= 65 then d2 - 55
157 Char.chr (val1 * 16 + val2)
159 (* Remove underscores from float literals *)
161 let remove_underscores s =
162 let l = String.length s in
163 let rec remove src dst =
165 if dst >= l then s else String.sub s 0 dst
168 '_' -> remove (src + 1) dst
169 | c -> s.[dst] <- c; remove (src + 1) (dst + 1)
172 (* Update the current location with file name and line number. *)
174 let update_loc lexbuf file line absolute chars =
175 let pos = lexbuf.lex_curr_p in
176 let new_file = match file with
177 | None -> pos.pos_fname
180 lexbuf.lex_curr_p <- { pos with
181 pos_fname = new_file;
182 pos_lnum = if absolute then line else pos.pos_lnum + line;
183 pos_bol = pos.pos_cnum - chars;
191 let report_error ppf = function
192 | Illegal_character c ->
193 fprintf ppf "Illegal character (%s)" (Char.escaped c)
194 | Illegal_escape s ->
195 fprintf ppf "Illegal backslash escape in string or character (%s)" s
196 | Unterminated_comment ->
197 fprintf ppf "Comment not terminated"
198 | Unterminated_string ->
199 fprintf ppf "String literal not terminated"
200 | Unterminated_string_in_comment ->
201 fprintf ppf "This comment contains an unterminated string literal"
202 | Keyword_as_label kwd ->
203 fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd
204 | Literal_overflow ty ->
205 fprintf ppf "Integer literal exceeds the range of representable integers of type %s" ty
210 let newline = ('\010' | '\013' | "\013\010")
211 let blank = [' ' '\009' '\012']
212 let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
213 let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
215 ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
217 ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
218 let decimal_literal =
219 ['0'-'9'] ['0'-'9' '_']*
221 '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']*
223 '0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*
225 '0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*
227 decimal_literal | hex_literal | oct_literal | bin_literal
229 ['0'-'9'] ['0'-'9' '_']*
230 ('.' ['0'-'9' '_']* )?
231 (['e' 'E'] ['+' '-']? ['0'-'9'] ['0'-'9' '_']*)?
235 { update_loc lexbuf None 1 false 0;
243 | "~" lowercase identchar * ':'
244 { let s = Lexing.lexeme lexbuf in
245 let name = String.sub s 1 (String.length s - 2) in
246 if Hashtbl.mem keyword_table name then
247 raise (Error(Keyword_as_label name, Location.curr lexbuf));
250 | "??" { QUESTIONQUESTION }
251 | "?" lowercase identchar * ':'
252 { let s = Lexing.lexeme lexbuf in
253 let name = String.sub s 1 (String.length s - 2) in
254 if Hashtbl.mem keyword_table name then
255 raise (Error(Keyword_as_label name, Location.curr lexbuf));
257 | lowercase identchar *
258 { let s = Lexing.lexeme lexbuf in
260 Hashtbl.find keyword_table s
263 | uppercase identchar *
264 { UIDENT(Lexing.lexeme lexbuf) } (* No capitalized keywords *)
267 INT (int_of_string(Lexing.lexeme lexbuf))
269 raise (Error(Literal_overflow "int", Location.curr lexbuf))
272 { FLOAT (remove_underscores(Lexing.lexeme lexbuf)) }
274 { let s = Lexing.lexeme lexbuf in
276 INT32 (Int32.of_string(String.sub s 0 (String.length s - 1)))
278 raise (Error(Literal_overflow "int32", Location.curr lexbuf)) }
280 { let s = Lexing.lexeme lexbuf in
282 INT64 (Int64.of_string(String.sub s 0 (String.length s - 1)))
284 raise (Error(Literal_overflow "int64", Location.curr lexbuf)) }
286 { let s = Lexing.lexeme lexbuf in
289 (Nativeint.of_string(String.sub s 0 (String.length s - 1)))
291 raise (Error(Literal_overflow "nativeint", Location.curr lexbuf)) }
293 { reset_string_buffer();
294 let string_start = lexbuf.lex_start_p in
295 string_start_loc := Location.curr lexbuf;
297 lexbuf.lex_start_p <- string_start;
298 STRING (get_stored_string()) }
300 { update_loc lexbuf None 1 false 1;
301 CHAR (Lexing.lexeme_char lexbuf 1) }
302 | "'" [^ '\\' '\'' '\010' '\013'] "'"
303 { CHAR(Lexing.lexeme_char lexbuf 1) }
304 | "'\\" ['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] "'"
305 { CHAR(char_for_backslash (Lexing.lexeme_char lexbuf 2)) }
306 | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
307 { CHAR(char_for_decimal_code lexbuf 2) }
308 | "'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
309 { CHAR(char_for_hexadecimal_code lexbuf 3) }
311 { let l = Lexing.lexeme lexbuf in
312 let esc = String.sub l 1 (String.length l - 1) in
313 raise (Error(Illegal_escape esc, Location.curr lexbuf))
316 { comment_start_loc := [Location.curr lexbuf];
320 { let loc = Location.curr lexbuf in
321 Location.prerr_warning loc Warnings.Comment_start;
322 comment_start_loc := [Location.curr lexbuf];
327 { let loc = Location.curr lexbuf in
328 Location.prerr_warning loc Warnings.Comment_not_end;
329 lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
330 let curpos = lexbuf.lex_curr_p in
331 lexbuf.lex_curr_p <- { curpos with pos_cnum = curpos.pos_cnum - 1 };
334 | "#" [' ' '\t']* (['0'-'9']+ as num) [' ' '\t']*
335 ("\"" ([^ '\010' '\013' '"' ] * as name) "\"")?
336 [^ '\010' '\013'] * newline
337 { update_loc lexbuf name (int_of_string num) true 0;
342 | "&&" { AMPERAMPER }
349 | "->" { MINUSGREATER }
353 | "::" { COLONCOLON }
354 | ":=" { COLONEQUAL }
355 | ":>" { COLONGREATER }
362 | "[|" { LBRACKETBAR }
363 | "[<" { LBRACKETLESS }
364 | "[>" { LBRACKETGREATER }
367 | "{<" { LBRACELESS }
370 | "|]" { BARRBRACKET }
372 | ">]" { GREATERRBRACKET }
374 | ">}" { GREATERRBRACE }
376 | "!=" { INFIXOP0 "!=" }
382 { PREFIXOP(Lexing.lexeme lexbuf) }
383 | ['~' '?'] symbolchar +
384 { PREFIXOP(Lexing.lexeme lexbuf) }
385 | ['=' '<' '>' '|' '&' '$'] symbolchar *
386 { INFIXOP0(Lexing.lexeme lexbuf) }
387 | ['@' '^'] symbolchar *
388 { INFIXOP1(Lexing.lexeme lexbuf) }
389 | ['+' '-'] symbolchar *
390 { INFIXOP2(Lexing.lexeme lexbuf) }
392 { INFIXOP4(Lexing.lexeme lexbuf) }
393 | ['*' '/' '%'] symbolchar *
394 { INFIXOP3(Lexing.lexeme lexbuf) }
397 { raise (Error(Illegal_character (Lexing.lexeme_char lexbuf 0),
398 Location.curr lexbuf))
403 { comment_start_loc := (Location.curr lexbuf) :: !comment_start_loc;
407 { match !comment_start_loc with
409 | [x] -> comment_start_loc := [];
410 | _ :: l -> comment_start_loc := l;
414 { reset_string_buffer();
415 string_start_loc := Location.curr lexbuf;
416 begin try string lexbuf
417 with Error (Unterminated_string, _) ->
418 match !comment_start_loc with
420 | loc :: _ -> comment_start_loc := [];
421 raise (Error (Unterminated_string_in_comment, loc))
423 reset_string_buffer ();
428 { update_loc lexbuf None 1 false 1;
431 | "'" [^ '\\' '\'' '\010' '\013' ] "'"
433 | "'\\" ['\\' '"' '\'' 'n' 't' 'b' 'r' ' '] "'"
435 | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
437 | "'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
440 { match !comment_start_loc with
442 | loc :: _ -> comment_start_loc := [];
443 raise (Error (Unterminated_comment, loc))
446 { update_loc lexbuf None 1 false 0;
455 | '\\' newline ([' ' '\t'] * as space)
456 { update_loc lexbuf None 1 false (String.length space);
459 | '\\' ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
460 { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
462 | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
463 { store_string_char(char_for_decimal_code lexbuf 1);
465 | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F']
466 { store_string_char(char_for_hexadecimal_code lexbuf 2);
472 (* Should be an error, but we are very lax.
473 raise (Error (Illegal_escape (Lexing.lexeme lexbuf),
474 Location.curr lexbuf))
476 let loc = Location.curr lexbuf in
477 Location.prerr_warning loc Warnings.Illegal_backslash;
478 store_string_char (Lexing.lexeme_char lexbuf 0);
479 store_string_char (Lexing.lexeme_char lexbuf 1);
484 { update_loc lexbuf None 1 false 0;
485 let s = Lexing.lexeme lexbuf in
486 for i = 0 to String.length s - 1 do
487 store_string_char s.[i];
492 { raise (Error (Unterminated_string, !string_start_loc)) }
494 { store_string_char(Lexing.lexeme_char lexbuf 0);
497 and skip_sharp_bang = parse
498 | "#!" [^ '\n']* '\n' [^ '\n']* "\n!#\n"
499 { update_loc lexbuf None 3 false 0 }
500 | "#!" [^ '\n']* '\n'
501 { update_loc lexbuf None 1 false 0 }