From: maxclausen@gmail.com (Christian Clausen)
To: cocci@systeme.lip6.fr
Subject: [Cocci] C++ namespace in cocci files
Date: Mon, 19 Nov 2012 17:38:41 +0100 [thread overview]
Message-ID: <50AA6091.8010702@gmail.com> (raw)
Hi,
Here is an update -- to the file parsing_cocci/lexer_cocci.mll -- which
adds support for c++ namespace syntax in cocci files.
-------------- next part --------------
{
open Parser_cocci_menhir
module D = Data
module Ast = Ast_cocci
module Ast0 = Ast0_cocci
module P = Parse_aux
module FC = Flag_parsing_cocci
exception Lexical of string
let tok = Lexing.lexeme
let line = ref 1
let logical_line = ref 0
(* ---------------------------------------------------------------------- *)
(* control codes *)
(* Defined in data.ml
type line_type = MINUS | OPTMINUS | UNIQUEMINUS | PLUS | CONTEXT | UNIQUE | OPT
*)
let current_line_type = ref (D.CONTEXT,!line,!logical_line)
let prev_plus = ref false
let line_start = ref 0 (* offset of the beginning of the line *)
let get_current_line_type lexbuf =
let (c,l,ll) = !current_line_type in
let lex_start = Lexing.lexeme_start lexbuf in
let preceeding_spaces =
if !line_start < 0 then 0 else lex_start - !line_start in
(*line_start := -1;*)
prev_plus := (c = D.PLUS) or (c = D.PLUSPLUS);
(c,l,ll,lex_start,preceeding_spaces,[],[],[])
let current_line_started = ref false
let col_zero = ref true
let contextify (c,l,ll,lex_start,preceeding_spaces,bef,aft,pos) =
(D.CONTEXT,l,ll,lex_start,preceeding_spaces,bef,aft,pos)
let reset_line lexbuf =
line := !line + 1;
current_line_type := (D.CONTEXT,!line,!logical_line);
current_line_started := false;
col_zero := true;
line_start := Lexing.lexeme_start lexbuf + 1
let started_line = ref (-1)
let start_line seen_char =
current_line_started := true;
col_zero := false;
(if seen_char && not(!line = !started_line)
then
begin
started_line := !line;
logical_line := !logical_line + 1
end)
let pass_zero _ = col_zero := false
let lexerr s1 s2 = raise (Lexical (Printf.sprintf "%s%s" s1 s2))
let opt_reverse_token token =
if !FC.interpret_inverted
then match token with
D.MINUS -> D.PLUSPLUS (* maybe too liberal *)
| D.OPTMINUS -> lexerr "cannot invert token ?- (an optional minus line), which is needed for reversing the patch" ""
| D.UNIQUEMINUS -> D.PLUS
| D.PLUS -> D.MINUS
| D.PLUSPLUS -> D.MINUS (* may not be sufficient *)
| _ -> token
else token
let add_current_line_type x =
match (opt_reverse_token x,!current_line_type) with
(D.MINUS,(D.CONTEXT,ln,lln)) ->
current_line_type := (D.MINUS,ln,lln)
| (D.MINUS,(D.UNIQUE,ln,lln)) ->
current_line_type := (D.UNIQUEMINUS,ln,lln)
| (D.MINUS,(D.OPT,ln,lln)) ->
current_line_type := (D.OPTMINUS,ln,lln)
| (D.PLUS,(D.CONTEXT,ln,lln)) ->
current_line_type := (D.PLUS,ln,lln)
| (D.PLUSPLUS,(D.CONTEXT,ln,lln)) ->
current_line_type := (D.PLUSPLUS,ln,lln)
| (D.UNIQUE,(D.CONTEXT,ln,lln)) ->
current_line_type := (D.UNIQUE,ln,lln)
| (D.OPT,(D.CONTEXT,ln,lln)) ->
current_line_type := (D.OPT,ln,lln)
| _ -> lexerr "invalid control character combination" ""
let check_minus_context_linetype s =
match !current_line_type with
(D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> lexerr "invalid in a + context: " s
| _ -> ()
let check_context_linetype s =
match !current_line_type with
(D.CONTEXT,_,_) -> ()
| _ -> lexerr "invalid in a nonempty context: " s
let check_plus_linetype s =
match !current_line_type with
(D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> ()
| _ -> lexerr "invalid in a non + context: " s
let check_arity_context_linetype s =
match !current_line_type with
(D.CONTEXT,_,_) | (D.PLUS,_,_) | (D.PLUSPLUS,_,_)
| (D.UNIQUE,_,_) | (D.OPT,_,_) -> ()
| _ -> lexerr "invalid in a nonempty context: " s
let check_comment s =
if not !current_line_started
then lexerr "+ expected at the beginning of the line" s
let process_include start finish str =
(match !current_line_type with
(D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
(try
let _ = Str.search_forward (Str.regexp "\\.\\.\\.") str start in
lexerr "... not allowed in + include" ""
with Not_found -> ())
| _ -> ());
String.sub str (start + 1) (finish - start - 1)
(* ---------------------------------------------------------------------- *)
type pm = PATCH | MATCH | UNKNOWN
let pm = ref UNKNOWN
let patch_or_match = function
PATCH ->
if not !D.ignore_patch_or_match
then
(match !pm with
MATCH ->
lexerr "- or + not allowed in the first column for a match" ""
| PATCH -> ()
| UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH)
| MATCH ->
if not !D.ignore_patch_or_match
then
(match !pm with
PATCH -> lexerr "* not allowed in the first column for a patch" ""
| MATCH -> ()
| UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH)
| _ -> failwith "unexpected argument"
(* ---------------------------------------------------------------------- *)
(* identifiers, including metavariables *)
let metavariables = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
let all_metavariables =
(Hashtbl.create(100) : (string,(string * (D.clt -> token)) list) Hashtbl.t)
let type_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
let declarer_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
let iterator_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
let symbol_names = (Hashtbl.create(15) : (string, D.clt -> token) Hashtbl.t)
let rule_names = (Hashtbl.create(100) : (string, unit) Hashtbl.t)
let check_var s linetype =
let fail _ =
if (!Data.in_prolog || !Data.in_rule_name) &&
Str.string_match (Str.regexp "<.*>") s 0
then TPathIsoFile s
else
try (Hashtbl.find metavariables s) linetype
with Not_found ->
(try (Hashtbl.find type_names s) linetype
with Not_found ->
(try (Hashtbl.find declarer_names s) linetype
with Not_found ->
(try (Hashtbl.find iterator_names s) linetype
with Not_found ->
(try (Hashtbl.find symbol_names s) linetype
with Not_found ->
TIdent (s,linetype))))) in
if !Data.in_meta or !Data.in_rule_name
then (try Hashtbl.find rule_names s; TRuleName s with Not_found -> fail())
else fail()
let id_tokens lexbuf =
let s = tok lexbuf in
let linetype = get_current_line_type lexbuf in
let in_rule_name = !Data.in_rule_name in
let in_meta = !Data.in_meta && not !Data.saw_struct in
let in_iso = !Data.in_iso in
let in_prolog = !Data.in_prolog in
(if s = "identifer" && in_meta
then Common.pr2 "Warning: should identifer be identifier?");
match s with
"metavariable" when in_meta -> check_arity_context_linetype s; TMetavariable
| "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier
| "type" when in_meta -> check_arity_context_linetype s; TType
| "parameter" when in_meta -> check_arity_context_linetype s; TParameter
| "constant" when in_meta -> check_arity_context_linetype s; TConstant
| "generated" when in_rule_name && not (!Flag.make_hrule = None) ->
check_arity_context_linetype s; TGenerated
| "expression" when in_meta || in_rule_name ->
check_arity_context_linetype s; TExpression
| "declaration" when in_meta || in_rule_name ->
check_arity_context_linetype s; TDeclaration
| "field" when in_meta || in_rule_name ->
check_arity_context_linetype s; TField
| "initialiser" when in_meta || in_rule_name ->
check_arity_context_linetype s; TInitialiser
| "initializer" when in_meta || in_rule_name ->
check_arity_context_linetype s; TInitialiser
| "idexpression" when in_meta ->
check_arity_context_linetype s; TIdExpression
| "statement" when in_meta -> check_arity_context_linetype s; TStatement
| "function" when in_meta -> check_arity_context_linetype s; TFunction
| "local" when in_meta -> check_arity_context_linetype s; TLocal
| "list" when in_meta -> check_arity_context_linetype s; Tlist
| "fresh" when in_meta -> check_arity_context_linetype s; TFresh
| "typedef" when in_meta -> check_arity_context_linetype s; TTypedef
| "declarer" when in_meta -> check_arity_context_linetype s; TDeclarer
| "iterator" when in_meta -> check_arity_context_linetype s; TIterator
| "name" when in_meta -> check_arity_context_linetype s; TName
| "position" when in_meta -> check_arity_context_linetype s; TPosition
| "any" when in_meta -> check_arity_context_linetype s; TPosAny
| "pure" when in_meta && in_iso ->
check_arity_context_linetype s; TPure
| "context" when in_meta && in_iso ->
check_arity_context_linetype s; TContext
| "error" when in_meta -> check_arity_context_linetype s; TError
| "words" when in_meta -> check_context_linetype s; TWords
| "symbol" when in_meta -> check_arity_context_linetype s; TSymbol
| "using" when in_rule_name || in_prolog -> check_context_linetype s; TUsing
| "virtual" when in_prolog or in_rule_name or in_meta ->
(* don't want to allow virtual as a rule name *)
check_context_linetype s; TVirtual
| "disable" when in_rule_name -> check_context_linetype s; TDisable
| "extends" when in_rule_name -> check_context_linetype s; TExtends
| "depends" when in_rule_name -> check_context_linetype s; TDepends
| "on" when in_rule_name -> check_context_linetype s; TOn
| "ever" when in_rule_name -> check_context_linetype s; TEver
| "never" when in_rule_name -> check_context_linetype s; TNever
(* exists and forall for when are reparsed in parse_cocci.ml *)
| "exists" when in_rule_name -> check_context_linetype s; TExists
| "forall" when in_rule_name -> check_context_linetype s; TForall
| "script" when in_rule_name -> check_context_linetype s; TScript
| "initialize" when in_rule_name -> check_context_linetype s; TInitialize
| "finalize" when in_rule_name -> check_context_linetype s; TFinalize
| "char" -> Tchar linetype
| "short" -> Tshort linetype
| "int" -> Tint linetype
| "double" -> Tdouble linetype
| "float" -> Tfloat linetype
| "long" -> Tlong linetype
| "void" -> Tvoid linetype
| "size_t" -> Tsize_t linetype
| "ssize_t" -> Tssize_t linetype
| "ptrdiff_t" -> Tptrdiff_t linetype
(* in_meta is only for the first keyword; drop it now to allow any type
name *)
| "struct" -> Data.saw_struct := true; Tstruct linetype
| "union" -> Data.saw_struct := true; Tunion linetype
| "enum" -> Data.saw_struct := true; Tenum linetype
| "unsigned" -> Tunsigned linetype
| "signed" -> Tsigned linetype
| "auto" -> Tauto linetype
| "register" -> Tregister linetype
| "extern" -> Textern linetype
| "static" -> Tstatic linetype
| "inline" -> Tinline linetype
| "typedef" -> Ttypedef linetype
| "const" -> Tconst linetype
| "volatile" -> Tvolatile linetype
| "if" -> TIf linetype
| "else" -> TElse linetype
| "while" -> TWhile linetype
| "do" -> TDo linetype
| "for" -> TFor linetype
| "switch" -> TSwitch linetype
| "case" -> TCase linetype
| "default" -> TDefault linetype
| "return" -> TReturn linetype
| "break" -> TBreak linetype
| "continue" -> TContinue linetype
| "goto" -> TGoto linetype
| "sizeof" -> TSizeof linetype
| "Expression" when !Data.in_iso -> TIsoExpression
| "ArgExpression" when !Data.in_iso -> TIsoArgExpression
| "TestExpression" when !Data.in_iso -> TIsoTestExpression
| "ToTestExpression" when !Data.in_iso -> TIsoToTestExpression
| "Statement" when !Data.in_iso -> TIsoStatement
| "Declaration" when !Data.in_iso -> TIsoDeclaration
| "Type" when !Data.in_iso -> TIsoType
| "TopLevel" when !Data.in_iso -> TIsoTopLevel
| "_" when !Data.in_meta -> TUnderscore
| s -> check_var s linetype
let mkassign op lexbuf =
TAssign (Ast.OpAssign op, (get_current_line_type lexbuf))
let init _ =
line := 1;
logical_line := 0;
prev_plus := false;
line_start := 0;
current_line_started := false;
current_line_type := (D.CONTEXT,0,0);
col_zero := true;
pm := UNKNOWN;
Data.in_rule_name := false;
Data.in_meta := false;
Data.in_prolog := false;
Data.saw_struct := false;
Data.inheritable_positions := [];
Hashtbl.clear all_metavariables;
Hashtbl.clear Data.all_metadecls;
Hashtbl.clear metavariables;
Hashtbl.clear type_names;
Hashtbl.clear rule_names;
Hashtbl.clear iterator_names;
Hashtbl.clear declarer_names;
Hashtbl.clear symbol_names;
let get_name (_,x) = x in
Data.add_meta_meta :=
(fun name pure ->
let fn clt = TMeta(name,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_id_meta :=
(fun name constraints pure ->
let fn clt = TMetaId(name,constraints,Ast.NoVal,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_virt_id_meta_found :=
(fun name vl ->
let fn clt = TIdent(vl,clt) in
Hashtbl.replace metavariables name fn);
Data.add_virt_id_meta_not_found :=
(fun name pure ->
let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast.NoVal,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_fresh_id_meta :=
(fun name seed ->
let fn clt = TMetaId(name,Ast.IdNoConstraint,seed,Ast0.Impure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_type_meta :=
(fun name pure ->
let fn clt = TMetaType(name,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_init_meta :=
(fun name pure ->
let fn clt = TMetaInit(name,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_initlist_meta :=
(function name -> function lenname -> function pure ->
let fn clt = TMetaInitList(name,lenname,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_param_meta :=
(function name -> function pure ->
let fn clt = TMetaParam(name,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_paramlist_meta :=
(function name -> function lenname -> function pure ->
let fn clt = TMetaParamList(name,lenname,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_const_meta :=
(fun tyopt name constraints pure ->
let fn clt = TMetaConst(name,constraints,pure,tyopt,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_err_meta :=
(fun name constraints pure ->
let fn clt = TMetaErr(name,constraints,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_exp_meta :=
(fun tyopt name constraints pure ->
let fn clt = TMetaExp(name,constraints,pure,tyopt,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_idexp_meta :=
(fun tyopt name constraints pure ->
let fn clt = TMetaIdExp(name,constraints,pure,tyopt,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_local_idexp_meta :=
(fun tyopt name constraints pure ->
let fn clt = TMetaLocalIdExp(name,constraints,pure,tyopt,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_explist_meta :=
(function name -> function lenname -> function pure ->
let fn clt = TMetaExpList(name,lenname,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_decl_meta :=
(function name -> function pure ->
let fn clt = TMetaDecl(name,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_field_meta :=
(function name -> function pure ->
let fn clt = TMetaField(name,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_field_list_meta :=
(function name -> function lenname -> function pure ->
let fn clt = TMetaFieldList(name,lenname,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_stm_meta :=
(function name -> function pure ->
let fn clt = TMetaStm(name,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_stmlist_meta :=
(function name -> function pure ->
let fn clt = TMetaStmList(name,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_func_meta :=
(fun name constraints pure ->
let fn clt = TMetaFunc(name,constraints,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_local_func_meta :=
(fun name constraints pure ->
let fn clt = TMetaLocalFunc(name,constraints,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_iterator_meta :=
(fun name constraints pure ->
let fn clt = TMetaIterator(name,constraints,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_declarer_meta :=
(fun name constraints pure ->
let fn clt = TMetaDeclarer(name,constraints,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_pos_meta :=
(fun name constraints any ->
let fn ((d,ln,_,_,_,_,_,_) as clt) =
(if d = Data.PLUS
then
failwith
(Printf.sprintf "%d: positions only allowed in minus code" ln));
TMetaPos(name,constraints,any,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_type_name :=
(function name ->
let fn clt = TTypeId(name,clt) in
Hashtbl.replace type_names name fn);
Data.add_declarer_name :=
(function name ->
let fn clt = TDeclarerId(name,clt) in
Hashtbl.replace declarer_names name fn);
Data.add_iterator_name :=
(function name ->
let fn clt = TIteratorId(name,clt) in
Hashtbl.replace iterator_names name fn);
Data.add_symbol_meta :=
(function name ->
let fn clt = TSymId (name,clt) in
Hashtbl.replace symbol_names name fn);
Data.init_rule := (function _ -> Hashtbl.clear metavariables);
Data.install_bindings :=
(function parent ->
List.iter (function (name,fn) -> Hashtbl.add metavariables name fn)
(Hashtbl.find all_metavariables parent))
(* the following is needed to properly tokenize include files. Because an
include file is included after seeing a @, so current_line_started is true.
Current_line_started is not important for parsing the name of a rule, so we
don't have to reset this value to true after parsing an included file. *)
let include_init _ =
current_line_started := false
let drop_spaces s =
let len = String.length s in
let rec loop n =
if n = len
then n
else
if List.mem (String.get s n) [' ';'\t']
then loop (n+1)
else n in
let start = loop 0 in
String.sub s start (len - start)
}
(* ---------------------------------------------------------------------- *)
(* tokens *)
let letter = ['A'-'Z' 'a'-'z' '_']
let digit = ['0'-'9']
let dec = ['0'-'9']
let oct = ['0'-'7']
let hex = ['0'-'9' 'a'-'f' 'A'-'F']
let decimal = ('0' | (['1'-'9'] dec*))
let octal = ['0'] oct+
let hexa = ("0x" |"0X") hex+
let pent = dec+
let pfract = dec+
let sign = ['-' '+']
let exp = ['e''E'] sign? dec+
let real = pent exp | ((pent? '.' pfract | pent '.' pfract? ) exp?)
rule token = parse
| [' ' '\t']* ['\n' '\r' '\011' '\012']
{ let cls = !current_line_started in
if not cls
then
begin
match !current_line_type with
(D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
let info = get_current_line_type lexbuf in
reset_line lexbuf;
TPragma (Ast.Noindent "", info)
| _ -> reset_line lexbuf; token lexbuf
end
else (reset_line lexbuf; token lexbuf) }
| [' ' '\t' ]+ { start_line false; token lexbuf }
| [' ' '\t' ]* (("//" [^ '\n']*) as after) {
match !current_line_type with
(D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
let str =
if !current_line_started
then (tok lexbuf)
else after in
start_line true;
TPragma (Ast.Indent str, get_current_line_type lexbuf)
| _ -> start_line false; token lexbuf }
| "__attribute__" [' ' '\t']* "((" _* "))"
{ match !current_line_type with
(D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
start_line true;
TPragma (Ast.Space (tok lexbuf), get_current_line_type lexbuf)
| _ -> failwith "attributes only allowedin + code" }
| "@@" { start_line true; TArobArob }
| "@" { pass_zero();
if !Data.in_rule_name or not !current_line_started
then (start_line true; TArob)
else (check_minus_context_linetype "@";
TPArob (get_current_line_type lexbuf)) }
| "=~" { start_line true; TTildeEq (get_current_line_type lexbuf) }
| "!~" { start_line true; TTildeExclEq (get_current_line_type lexbuf) }
| "WHEN" | "when"
{ start_line true; check_minus_context_linetype (tok lexbuf);
TWhen (get_current_line_type lexbuf) }
| "..."
{ start_line true; check_minus_context_linetype (tok lexbuf);
TEllipsis (get_current_line_type lexbuf) }
(*
| "ooo"
{ start_line true; check_minus_context_linetype (tok lexbuf);
TCircles (get_current_line_type lexbuf) }
| "***"
{ start_line true; check_minus_context_linetype (tok lexbuf);
TStars (get_current_line_type lexbuf) }
*)
| "<..." { start_line true; check_context_linetype (tok lexbuf);
TOEllipsis (get_current_line_type lexbuf) }
| "...>" { start_line true; check_context_linetype (tok lexbuf);
TCEllipsis (get_current_line_type lexbuf) }
| "<+..." { start_line true; check_minus_context_linetype (tok lexbuf);
TPOEllipsis (get_current_line_type lexbuf) }
| "...+>" { start_line true; check_minus_context_linetype (tok lexbuf);
TPCEllipsis (get_current_line_type lexbuf) }
(*
| "<ooo" { start_line true; check_context_linetype (tok lexbuf);
TOCircles (get_current_line_type lexbuf) }
| "ooo>" { start_line true; check_context_linetype (tok lexbuf);
TCCircles (get_current_line_type lexbuf) }
| "<***" { start_line true; check_context_linetype (tok lexbuf);
TOStars (get_current_line_type lexbuf) }
| "***>" { start_line true; check_context_linetype (tok lexbuf);
TCStars (get_current_line_type lexbuf) }
*)
| "-" { pass_zero();
if !current_line_started
then (start_line true; TMinus (get_current_line_type lexbuf))
else (patch_or_match PATCH;
add_current_line_type D.MINUS; token lexbuf) }
| "+" { pass_zero();
if !current_line_started
then (start_line true; TPlus (get_current_line_type lexbuf))
else if !Data.in_meta
then TPlus0
else (patch_or_match PATCH;
add_current_line_type D.PLUS; token lexbuf) }
| "?" { pass_zero();
if !current_line_started
then (start_line true; TWhy (get_current_line_type lexbuf))
else if !Data.in_meta
then TWhy0
else (add_current_line_type D.OPT; token lexbuf) }
| "!" { pass_zero();
if !current_line_started
then (start_line true; TBang (get_current_line_type lexbuf))
else if !Data.in_meta
then TBang0
else (add_current_line_type D.UNIQUE; token lexbuf) }
| "(" { if !Data.in_meta or not !col_zero
then (start_line true; TOPar (get_current_line_type lexbuf))
else
(start_line true; check_context_linetype (tok lexbuf);
TOPar0 (get_current_line_type lexbuf))}
| "\\(" { start_line true;
TOPar0 (contextify(get_current_line_type lexbuf)) }
| "|" { if not (!col_zero)
then (start_line true; TOr(get_current_line_type lexbuf))
else (start_line true;
check_context_linetype (tok lexbuf);
TMid0 (get_current_line_type lexbuf))}
| "\\|" { start_line true;
TMid0 (contextify(get_current_line_type lexbuf)) }
| ")" { if not !col_zero
then (start_line true; TCPar (get_current_line_type lexbuf))
else
(start_line true; check_context_linetype (tok lexbuf);
TCPar0 (get_current_line_type lexbuf))}
| "\\)" { start_line true;
TCPar0 (contextify(get_current_line_type lexbuf)) }
| '[' { start_line true; TOCro (get_current_line_type lexbuf) }
| ']' { start_line true; TCCro (get_current_line_type lexbuf) }
| '{' { start_line true; TOBrace (get_current_line_type lexbuf) }
| '}' { start_line true; TCBrace (get_current_line_type lexbuf) }
| "->" { start_line true; TPtrOp (get_current_line_type lexbuf) }
| '.' { start_line true; TDot (get_current_line_type lexbuf) }
| ',' { start_line true; TComma (get_current_line_type lexbuf) }
| ";" { start_line true;
if !Data.in_meta
then TMPtVirg (* works better with tokens_all *)
else TPtVirg (get_current_line_type lexbuf) }
| '*' { pass_zero();
if !current_line_started
then
(start_line true; TMul (get_current_line_type lexbuf))
else
(patch_or_match MATCH;
add_current_line_type D.MINUS; token lexbuf) }
| '/' { start_line true;
TDmOp (Ast.Div,get_current_line_type lexbuf) }
| '%' { start_line true;
TDmOp (Ast.Mod,get_current_line_type lexbuf) }
| '~' { start_line true; TTilde (get_current_line_type lexbuf) }
| "++" { pass_zero();
if !current_line_started
then
(start_line true; TInc (get_current_line_type lexbuf))
else (patch_or_match PATCH;
add_current_line_type D.PLUSPLUS; token lexbuf) }
| "--" { start_line true; TDec (get_current_line_type lexbuf) }
| "=" { start_line true; TEq (get_current_line_type lexbuf) }
| "-=" { start_line true; mkassign Ast.Minus lexbuf }
| "+=" { start_line true; mkassign Ast.Plus lexbuf }
| "*=" { start_line true; mkassign Ast.Mul lexbuf }
| "/=" { start_line true; mkassign Ast.Div lexbuf }
| "%=" { start_line true; mkassign Ast.Mod lexbuf }
| "&=" { start_line true; mkassign Ast.And lexbuf }
| "|=" { start_line true; mkassign Ast.Or lexbuf }
| "^=" { start_line true; mkassign Ast.Xor lexbuf }
| "<<=" { start_line true; mkassign Ast.DecLeft lexbuf }
| ">>=" { start_line true; mkassign Ast.DecRight lexbuf }
| ":" { start_line true; TDotDot (get_current_line_type lexbuf) }
| "==" { start_line true; TEqEq (get_current_line_type lexbuf) }
| "!=" { start_line true; TNotEq (get_current_line_type lexbuf) }
| ">=" { start_line true;
TLogOp(Ast.SupEq,get_current_line_type lexbuf) }
| "<=" { start_line true;
if !Data.in_meta
then TSub(get_current_line_type lexbuf)
else TLogOp(Ast.InfEq,get_current_line_type lexbuf) }
| "<" { start_line true;
TLogOp(Ast.Inf,get_current_line_type lexbuf) }
| ">" { start_line true;
TLogOp(Ast.Sup,get_current_line_type lexbuf) }
| "&&" { start_line true; TAndLog (get_current_line_type lexbuf) }
| "||" { start_line true; TOrLog (get_current_line_type lexbuf) }
| ">>" { start_line true;
TShROp(Ast.DecRight,get_current_line_type lexbuf) }
| "<<" { start_line true;
TShLOp(Ast.DecLeft,get_current_line_type lexbuf) }
| "&" { start_line true; TAnd (get_current_line_type lexbuf) }
| "^" { start_line true; TXor(get_current_line_type lexbuf) }
| "##" { start_line true; TCppConcatOp }
| (( ("#" [' ' '\t']* "undef" [' ' '\t']+)) as def)
( (letter (letter |digit)*) as ident)
{ start_line true;
let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
get_current_line_type lexbuf in
let off = String.length def in
(* -1 in the code below because the ident is not at the line start *)
TUndef
(lt,
check_var ident
(arity,line,lline,offset+off,col+off,[],[],[])) }
| (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def)
( (letter (letter |digit)*) as ident)
{ start_line true;
let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
get_current_line_type lexbuf in
let off = String.length def in
(* -1 in the code below because the ident is not@the line start *)
TDefine
(lt,
check_var ident
(arity,line,lline,offset+off,col+off,[],[],[])) }
| (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def)
( (letter (letter | digit)*) as ident)
'('
{ start_line true;
let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
get_current_line_type lexbuf in
let off = String.length def in
TDefineParam
(lt,
check_var ident
(* why pos here but not above? *)
(arity,line,lline,offset+off,col+off,strbef,straft,pos),
offset + off + (String.length ident),
col + off + (String.length ident)) }
| "#" [' ' '\t']* "include" [' ' '\t']* '\"' [^ '\"']+ '\"'
{ TIncludeL
(let str = tok lexbuf in
let start = String.index str '\"' in
let finish = String.rindex str '\"' in
start_line true;
(process_include start finish str,get_current_line_type lexbuf)) }
| "#" [' ' '\t']* "include" [' ' '\t']* '<' [^ '>']+ '>'
{ TIncludeNL
(let str = tok lexbuf in
let start = String.index str '<' in
let finish = String.rindex str '>' in
start_line true;
(process_include start finish str,get_current_line_type lexbuf)) }
| "#" [' ' '\t']* "if" [^'\n']*
| "#" [' ' '\t']* "ifdef" [^'\n']*
| "#" [' ' '\t']* "ifndef" [^'\n']*
| "#" [' ' '\t']* "else" [^'\n']*
| "#" [' ' '\t']* "elif" [^'\n']*
| "#" [' ' '\t']* "endif" [^'\n']*
| "#" [' ' '\t']* "error" [^'\n']*
| "#" [' ' '\t']* "pragma" [^'\n']*
| "#" [' ' '\t']* "line" [^'\n']*
{ start_line true; check_plus_linetype (tok lexbuf);
TPragma (Ast.Noindent(tok lexbuf), get_current_line_type lexbuf) }
| "/*"
{
match !current_line_type with
(D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
start_line true;
(* second argument to TPragma is not quite right, because
it represents only the first token of the comment, but that
should be good enough *)
TPragma (Ast.Indent("/*"^(comment check_comment lexbuf)),
get_current_line_type lexbuf)
| _ -> let _ = comment (fun _ -> ()) lexbuf in token lexbuf }
| "---" [^'\n']*
{ (if !current_line_started
then lexerr "--- must be at the beginning of the line" "");
start_line true;
TMinusFile
(let str = tok lexbuf in
(drop_spaces(String.sub str 3 (String.length str - 3)),
(get_current_line_type lexbuf))) }
| "+++" [^'\n']*
{ (if !current_line_started
then lexerr "+++ must be at the beginning of the line" "");
start_line true;
TPlusFile
(let str = tok lexbuf in
(drop_spaces(String.sub str 3 (String.length str - 3)),
(get_current_line_type lexbuf))) }
| letter (letter | digit)*
{ start_line true; id_tokens lexbuf }
(* christia: testing *)
| (letter | '$') (letter | digit | '$') *
{ start_line true; id_tokens lexbuf }
| (letter | '$') (letter | digit | '$') *
('<' (letter | '$' | '~') (letter | digit | '$' | '~') * '>') ?
("::~" (letter | '$') (letter | digit | '$') *
('<' (letter | '$' | '~') (letter | digit | '$' | '~') * '>') ?) +
{
start_line true;
if not !Flag.c_plus_plus
then Common.pr2_once "< and > not allowed in C identifiers, try -c++ option";
id_tokens lexbuf
}
| ((letter | '$') (letter | digit | '$') * )
('<' (letter | '$' | '~') (letter | digit | '$' | '~') * '>')
{
start_line true;
if not !Flag.c_plus_plus
then Common.pr2_once "< and > not allowed in C identifiers, try -c++ option";
id_tokens lexbuf
}
| (((letter | '$') (letter | digit | '$') * ) as first)
('<' (letter | '$' | '~') (letter | digit | '$' | '~') * '>') ?
"::" (((letter | '$') (letter | digit | '$') * ) as second)
('<' (letter | '$' | '~') (letter | digit | '$' | '~') * '>') ?
("::" ((letter | '$') (letter | digit | '$') * )
('<' (letter | '$' | '~') (letter | digit | '$' | '~') * '>') ?) *
{
start_line true;
if not !Flag.c_plus_plus
then Common.pr2_once "~ and :: not allowed in C identifiers, try -c++ option";
id_tokens lexbuf
}
| "::" ((letter | '$') (letter | digit | '$') * )
('<' (letter | '$' | '~') (letter | digit | '$' | '~') * '>') ?
("::" ((letter | '$') (letter | digit | '$') * )
('<' (letter | '$' | '~') (letter | digit | '$' | '~') * '>') ?) *
{
start_line true;
if not !Flag.c_plus_plus
then Common.pr2_once "~ and :: not allowed in C identifiers, try -c++ option";
id_tokens lexbuf
}
(* christia: end *)
| "'" { start_line true;
TChar(char lexbuf,get_current_line_type lexbuf) }
| '\"' { start_line true;
TString(string lexbuf,(get_current_line_type lexbuf)) }
| (real as x) { start_line true;
TFloat(x,(get_current_line_type lexbuf)) }
| ((( decimal | hexa | octal)
( ['u' 'U']
| ['l' 'L']
| (['l' 'L'] ['u' 'U'])
| (['u' 'U'] ['l' 'L'])
| (['u' 'U'] ['l' 'L'] ['l' 'L'])
| (['l' 'L'] ['l' 'L'])
)?
) as x) { start_line true; TInt(x,(get_current_line_type lexbuf)) }
| "<=>" { TIso }
| "=>" { TRightIso }
| eof { EOF }
| _ { lexerr "unrecognised symbol, in token rule: " (tok lexbuf) }
and char = parse
| (_ as x) { String.make 1 x ^ restchars lexbuf }
(* todo?: as for octal, do exception beyond radix exception ? *)
| (("\\" (oct | oct oct | oct oct oct)) as x ) { x ^ restchars lexbuf }
(* this rule must be after the one with octal, lex try first longest
* and when \7 we want an octal, not an exn.
*)
| (("\\x" ((hex | hex hex))) as x ) { x ^ restchars lexbuf }
| (("\\" (_ as v)) as x )
{
(match v with (* Machine specific ? *)
| 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> ()
| 'f' -> () | 'a' -> ()
| '\\' -> () | '?' -> () | '\'' -> () | '\"' -> ()
| 'e' -> () (* linuxext: ? *)
| _ ->
Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
);
x ^ restchars lexbuf
}
| _
{ Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
tok lexbuf ^ restchars lexbuf
}
and restchars = parse
| "'" { "" }
| (_ as x) { String.make 1 x ^ restchars lexbuf }
(* todo?: as for octal, do exception beyond radix exception ? *)
| (("\\" (oct | oct oct | oct oct oct)) as x ) { x ^ restchars lexbuf }
(* this rule must be after the one with octal, lex try first longest
* and when \7 we want an octal, not an exn.
*)
| (("\\x" ((hex | hex hex))) as x ) { x ^ restchars lexbuf }
| (("\\" (_ as v)) as x )
{
(match v with (* Machine specific ? *)
| 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> ()
| 'f' -> () | 'a' -> ()
| '\\' -> () | '?' -> () | '\'' -> () | '\"' -> ()
| 'e' -> () (* linuxext: ? *)
| _ ->
Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
);
x ^ restchars lexbuf
}
| _
{ Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
tok lexbuf ^ restchars lexbuf
}
and string = parse
| '\"' { "" }
| (_ as x) { Common.string_of_char x ^ string lexbuf }
| ("\\" (oct | oct oct | oct oct oct)) as x { x ^ string lexbuf }
| ("\\x" (hex | hex hex)) as x { x ^ string lexbuf }
| ("\\" (_ as v)) as x
{
(match v with
| 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> ()
| 'f' -> () | 'a' -> ()
| '\\' -> () | '?' -> () | '\'' -> () | '\"' -> ()
| 'e' -> ()
| '\n' -> ()
| '(' -> () | '|' -> () | ')' -> ()
| _ -> lexerr "unrecognised symbol:" (tok lexbuf)
);
x ^ string lexbuf
}
| _ { lexerr "unrecognised symbol: " (tok lexbuf) }
and comment check_comment = parse
| "*/" { let s = tok lexbuf in check_comment s; start_line true; s }
| ['\n' '\r' '\011' '\012']
{ let s = tok lexbuf in
(* even blank line should have a + *)
check_comment s;
reset_line lexbuf; s ^ comment check_comment lexbuf }
| "+" { pass_zero();
if !current_line_started
then (start_line true;
let s = tok lexbuf in s^(comment check_comment lexbuf))
else (start_line true; comment check_comment lexbuf) }
(* noteopti: *)
| [^ '*']
{ let s = tok lexbuf in
check_comment s; start_line true; s ^ comment check_comment lexbuf }
| [ '*']
{ let s = tok lexbuf in
check_comment s; start_line true; s ^ comment check_comment lexbuf }
| _
{ start_line true; let s = tok lexbuf in
Common.pr2 ("LEXER: unrecognised symbol in comment:"^s);
s ^ comment check_comment lexbuf
}
next reply other threads:[~2012-11-19 16:38 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-11-19 16:38 Christian Clausen [this message]
2012-11-21 10:51 ` [Cocci] C++ namespace in cocci files Christian Clausen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=50AA6091.8010702@gmail.com \
--to=maxclausen@gmail.com \
--cc=cocci@systeme.lip6.fr \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.