Library to parse ERB files

前端 未结 2 1374
耶瑟儿~
耶瑟儿~ 2021-02-10 10:25

I am attempting to parse, not evaluate, rails ERB files in a Hpricot/Nokogiri type manner. The files I am attempting to parse contain HTML fragments intermixed with dynamic con

2条回答
  •  独厮守ぢ
    2021-02-10 11:05

    I eventually ended up solving this problem by using RLex, http://raa.ruby-lang.org/project/ruby-lex/, the ruby version of lex with the following grammer:

    %{
    
    #define NUM 257
    
    #define OPTOK 258
    #define IDENT 259
    #define OPETOK 260
    #define CLSTOK 261
    #define CLTOK 262
    #define FLOAT 263
    #define FIXNUM 264
    #define WORD 265
    #define STRING_DOUBLE_QUOTE 266
    #define STRING_SINGLE_QUOTE 267
    
    #define TAG_START 268
    #define TAG_END 269
    #define TAG_SELF_CONTAINED 270
    #define ERB_BLOCK_START 271
    #define ERB_BLOCK_END 272
    #define ERB_STRING_START 273
    #define ERB_STRING_END 274
    #define TAG_NO_TEXT_START 275
    #define TAG_NO_TEXT_END 276
    #define WHITE_SPACE 277
    %}
    
    digit   [0-9]
    blank   [ ]
    letter  [A-Za-z]
    name1   [A-Za-z_]
    name2   [A-Za-z_0-9]
    valid_tag_character [A-Za-z0-9"'=@_():/ ] 
    ignore_tags style|script
    %%
    
    {blank}+"\n"                  { return [ WHITE_SPACE, yytext ] } 
    "\n"{blank}+                  { return [ WHITE_SPACE, yytext ] } 
    {blank}+"\n"{blank}+                  { return [ WHITE_SPACE, yytext ] } 
    
    "\r"                  { return [ WHITE_SPACE, yytext ] } 
    "\n"            { return[ yytext[0], yytext[0..0] ] };
    "\t"            { return[ yytext[0], yytext[0..0] ] };
    
    ^{blank}+       { return [ WHITE_SPACE, yytext ] }
    
    {blank}+$       { return [ WHITE_SPACE, yytext ] };
    
    ""   { return [ TAG_NO_TEXT_START, yytext ] }
    ""  { return [ TAG_NO_TEXT_END, yytext ] }
    ""                   { return [ TAG_SELF_CONTAINED, yytext ] }
    ""  { return [ TAG_SELF_CONTAINED, yytext ] }
    ""    { return [ TAG_START, yytext ] }
    ""   { return [ TAG_END, yytext ] }
    
    ""  { return [ ERB_BLOCK_END, yytext ] }
    ""  { return [ ERB_STRING_END, yytext ] }
    
    
    {letter}+       { return [ WORD, yytext ] }
    
    
    \".*\"          { return [ STRING_DOUBLE_QUOTE, yytext ] }
    '.*'                    { return [ STRING_SINGLE_QUOTE, yytext ] }
    .           { return [ yytext[0], yytext[0..0] ] }
    
    %%
    

    This is not a complete grammer but for my purposes, locating and re-emitting text, it worked. I combined that grammer with this small piece of code:

        text_handler = MakeYourOwnCallbackHandler.new
    
        l = Erblex.new
        l.yyin = File.open(file_name, "r")
    
        loop do
          a,v = l.yylex
          break if a == 0
    
          if( a < WORD )
            text_handler.character( v.to_s, a )
          else
            case a
            when WORD
              text_handler.text( v.to_s )
            when TAG_START
              text_handler.start_tag( v.to_s )
            when TAG_END
              text_handler.end_tag( v.to_s )
            when WHITESPACE
              text_handler.white_space( v.to_s )
            when ERB_BLOCK_START
              text_handler.erb_block_start( v.to_s )
            when ERB_BLOCK_END
              text_handler.erb_block_end( v.to_s )      
            when ERB_STRING_START
              text_handler.erb_string_start( v.to_s )
            when ERB_STRING_END
              self.text_handler.erb_string_end( v.to_s )
            when TAG_NO_TEXT_START
              text_handler.ignorable_tag_start( v.to_s )
            when TAG_NO_TEXT_END
              text_handler.ignorable_tag_end( v.to_s )
            when STRING_DOUBLE_QUOTE
              text_handler.string_double_quote( v.to_s )
            when STRING_SINGLE_QUOTE
              text_handler.string_single_quote( v.to_s )
            when TAG_SELF_CONTAINED
              text_handler.tag_self_contained( v.to_s )
            end
          end  
        end
    

提交回复
热议问题