1 (***********************************************************************)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
7 (* Copyright 1996 Institut National de Recherche en Informatique et *)
8 (* en Automatique. All rights reserved. This file is distributed *)
9 (* under the terms of the Q Public License version 1.0. *)
11 (***********************************************************************)
13 (* $Id: lexer301.mll 6720 2004-11-30 18:57:04Z doligez $ *)
15 (* The lexer definition *)
62 | INFIXOP0 of (string)
63 | INFIXOP1 of (string)
64 | INFIXOP2 of (string)
65 | INFIXOP3 of (string)
66 | INFIXOP4 of (string)
93 | OPTLABEL of (string)
97 | PREFIXOP of (string)
128 | Illegal_character of char
129 | Unterminated_comment
130 | Unterminated_string
131 | Unterminated_string_in_comment
132 | Keyword_as_label of string
135 exception Error of error * int * int
137 (* The table of keywords *)
140 create_hashtable 149 [
146 "constraint", CONSTRAINT;
152 "exception", EXCEPTION;
153 "external", EXTERNAL;
157 "function", FUNCTION;
163 "initializer", INITIALIZER;
191 "mod", INFIXOP3("mod");
192 "land", INFIXOP3("land");
193 "lor", INFIXOP3("lor");
194 "lxor", INFIXOP3("lxor");
195 "lsl", INFIXOP4("lsl");
196 "lsr", INFIXOP4("lsr");
197 "asr", INFIXOP4("asr")
200 (* To buffer string literals *)
202 let initial_string_buffer = String.create 256
203 let string_buff = ref initial_string_buffer
204 let string_index = ref 0
206 let reset_string_buffer () =
207 string_buff := initial_string_buffer;
210 let store_string_char c =
211 if !string_index >= String.length (!string_buff) then begin
212 let new_buff = String.create (String.length (!string_buff) * 2) in
213 String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff));
214 string_buff := new_buff
216 String.unsafe_set (!string_buff) (!string_index) c;
219 let get_stored_string () =
220 let s = String.sub (!string_buff) 0 (!string_index) in
221 string_buff := initial_string_buffer;
224 (* To translate escape sequences *)
226 let char_for_backslash = function
233 let char_for_decimal_code lexbuf i =
234 let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
235 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
236 (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in
237 Char.chr(c land 0xFF)
239 (* To store the position of the beginning of a string and comment *)
240 let string_start_pos = ref 0;;
241 let comment_start_pos = ref [];;
242 let in_comment () = !comment_start_pos <> [];;
248 let report_error ppf = function
249 | Illegal_character c ->
250 fprintf ppf "Illegal character (%s)" (Char.escaped c)
251 | Unterminated_comment ->
252 fprintf ppf "Comment not terminated"
253 | Unterminated_string ->
254 fprintf ppf "String literal not terminated"
255 | Unterminated_string_in_comment ->
256 fprintf ppf "This comment contains an unterminated string literal"
257 | Keyword_as_label kwd ->
258 fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd
263 let blank = [' ' '\010' '\013' '\009' '\012']
264 let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
265 let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
267 ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
269 ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
270 let decimal_literal = ['0'-'9']+
271 let hex_literal = '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+
272 let oct_literal = '0' ['o' 'O'] ['0'-'7']+
273 let bin_literal = '0' ['b' 'B'] ['0'-'1']+
275 ['0'-'9']+ ('.' ['0'-'9']* )? (['e' 'E'] ['+' '-']? ['0'-'9']+)?
283 | "~" lowercase identchar * ':'
284 { let s = Lexing.lexeme lexbuf in
285 let name = String.sub s 1 (String.length s - 2) in
286 if Hashtbl.mem keyword_table name then
287 raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,
288 Lexing.lexeme_end lexbuf));
291 | "?" lowercase identchar * ':'
292 { let s = Lexing.lexeme lexbuf in
293 let name = String.sub s 1 (String.length s - 2) in
294 if Hashtbl.mem keyword_table name then
295 raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,
296 Lexing.lexeme_end lexbuf));
298 | lowercase identchar *
299 { let s = Lexing.lexeme lexbuf in
301 Hashtbl.find keyword_table s
304 | uppercase identchar *
305 { UIDENT(Lexing.lexeme lexbuf) } (* No capitalized keywords *)
306 | decimal_literal | hex_literal | oct_literal | bin_literal
307 { INT (int_of_string(Lexing.lexeme lexbuf)) }
309 { FLOAT (Lexing.lexeme lexbuf) }
311 { reset_string_buffer();
312 let string_start = Lexing.lexeme_start lexbuf in
313 string_start_pos := string_start;
315 lexbuf.Lexing.lex_start_pos <-
316 string_start - lexbuf.Lexing.lex_abs_pos;
317 STRING (get_stored_string()) }
318 | "'" [^ '\\' '\''] "'"
319 { CHAR(Lexing.lexeme_char lexbuf 1) }
320 | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'"
321 { CHAR(char_for_backslash (Lexing.lexeme_char lexbuf 2)) }
322 | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
323 { CHAR(char_for_decimal_code lexbuf 2) }
325 { comment_start_pos := [Lexing.lexeme_start lexbuf];
329 { let loc = Location.curr lexbuf
330 and warn = Warnings.Comment_start
332 Location.prerr_warning loc warn;
333 comment_start_pos := [Lexing.lexeme_start lexbuf];
338 { let loc = Location.curr lexbuf
339 and warn = Warnings.Comment_not_end
341 Location.prerr_warning loc warn;
342 lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
345 | "#" [' ' '\t']* ['0'-'9']+ [^ '\n' '\r'] * ('\n' | '\r' | "\r\n")
350 | "&&" { AMPERAMPER }
358 | "->" { MINUSGREATER }
362 | "::" { COLONCOLON }
363 | ":=" { COLONEQUAL }
364 | ":>" { COLONGREATER }
371 | "[|" { LBRACKETBAR }
372 | "[<" { LBRACKETLESS }
375 | "{<" { LBRACELESS }
378 | "|]" { BARRBRACKET }
380 | ">]" { GREATERRBRACKET }
382 | ">}" { GREATERRBRACE }
384 | "!=" { INFIXOP0 "!=" }
390 { PREFIXOP(Lexing.lexeme lexbuf) }
391 | ['~' '?'] symbolchar +
392 { PREFIXOP(Lexing.lexeme lexbuf) }
393 | ['=' '<' '>' '|' '&' '$'] symbolchar *
394 { INFIXOP0(Lexing.lexeme lexbuf) }
395 | ['@' '^'] symbolchar *
396 { INFIXOP1(Lexing.lexeme lexbuf) }
397 | ['+' '-'] symbolchar *
398 { INFIXOP2(Lexing.lexeme lexbuf) }
400 { INFIXOP4(Lexing.lexeme lexbuf) }
401 | ['*' '/' '%'] symbolchar *
402 { INFIXOP3(Lexing.lexeme lexbuf) }
405 { raise (Error(Illegal_character ((Lexing.lexeme lexbuf).[0]),
406 Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)) }
410 { comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
414 { match !comment_start_pos with
416 | [x] -> comment_start_pos := [];
417 | _ :: l -> comment_start_pos := l;
421 { reset_string_buffer();
422 string_start_pos := Lexing.lexeme_start lexbuf;
423 begin try string lexbuf
424 with Error (Unterminated_string, _, _) ->
425 let st = List.hd !comment_start_pos in
426 raise (Error (Unterminated_string_in_comment, st, st + 2))
428 string_buff := initial_string_buffer;
432 | "'" [^ '\\' '\''] "'"
434 | "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'"
436 | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
439 { let st = List.hd !comment_start_pos in
440 raise (Error (Unterminated_comment, st, st + 2));
448 | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] *
450 | '\\' ['\\' '"' 'n' 't' 'b' 'r']
451 { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
453 | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
454 { store_string_char(char_for_decimal_code lexbuf 1);
457 { raise (Error (Unterminated_string,
458 !string_start_pos, !string_start_pos+1)) }
460 { store_string_char(Lexing.lexeme_char lexbuf 0);