With Logstash, how do you combine lines based on matching same timestamp on every line?

后端未结

关注

 1  1881

MarkLogic log files denote mulitple lines by printing the lines with the same timestamp down to the millisecond. You can see that in this snippet:

2014-05-09 18:


                      
              相关标签:


      
      
        
          1条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  情书的邮戳        
                
              
                            
                2021-01-22 17:52
              
            
            
                                                                       
This seemed like an interesting problem, so I wrote a plugin for it, based loosely on the multiline plugin, but there are some complications.  There is a bug in the core logstash code that needs to be fixed for the plugin though.

Extract the following into lib/filters/related.rb

# encoding: utf-8
require "logstash/filters/base"
require "logstash/namespace"
require "set"
#
# This filter will collapse multiline messages from a single source into one Logstash event.
# if they are related based on a pattern defined.  Two events are related if the pattern
# match returns the same thing for consecutive events.
# 
# The config looks like this:
#
#     filter {
#       related {
#         type => "type"
#         pattern => "^%{TIMESTAMP:time}"
#         capture => "TIMESTAMP:time"
#       }
#     }
# 
# The `pattern` is the pattern that is used to match the lines
# The `capture` is the named capture that has to match between the lines
#
class LogStash::Filters::Related < LogStash::Filters::Base

  config_name "related"
  milestone 1

  # The regular expression to match.
  config :pattern, :validate => :string, :required => true
  config :capture, :validate => :string, :required => true

  # The stream identity is how the multiline filter determines which stream an
  # event belongs to. This is generally used for differentiating, say, events
  # coming from multiple files in the same file input, or multiple connections
  # coming from a tcp input.
  #
  # The default value here is usually what you want, but there are some cases
  # where you want to change it. One such example is if you are using a tcp
  # input with only one client connecting at any time. If that client
  # reconnects (due to error or client restart), then logstash will identify
  # the new connection as a new stream and break any multiline goodness that
  # may have occurred between the old and new connection. To solve this use
  # case, you can use "%{@source_host}.%{@type}" instead.
  config :stream_identity , :validate => :string, :default => "%{host}.%{path}.%{type}"

  # Logstash ships by default with a bunch of patterns, so you don't
  # necessarily need to define this yourself unless you are adding additional
  # patterns.
  #
  # Pattern files are plain text with format:
  #
  #     NAME PATTERN
  #
  # For example:
  #
  #     NUMBER \d+
  config :patterns_dir, :validate => :array, :default => []

  # Detect if we are running from a jarfile, pick the right path.
  @@patterns_path = Set.new
  if __FILE__ =~ /file:\/.*\.jar!.*/
    @@patterns_path += ["#{File.dirname(__FILE__)}/../../patterns/*"]
  else
    @@patterns_path += ["#{File.dirname(__FILE__)}/../../../patterns/*"]
  end

  public
  def initialize(config = {})
    super

    @threadsafe = false

    # This filter needs to keep state.
    @types = Hash.new { |h,k| h[k] = [] }
    @pending_unmatched = Hash.new
    @pending = Hash.new
    @previous = Hash.new
  end # def initialize

  public
  def register
    require "grok-pure" # rubygem 'jls-grok'

    @grok = Grok.new

    @patterns_dir = @@patterns_path.to_a + @patterns_dir
    @patterns_dir.each do |path|
      # Can't read relative paths from jars, try to normalize away '../'
      while path =~ /file:\/.*\.jar!.*\/\.\.\//
        # replace /foo/bar/../baz => /foo/baz
        path = path.gsub(/[^\/]+\/\.\.\//, "")
      end

      if File.directory?(path)
        path = File.join(path, "*")
      end

      Dir.glob(path).each do |file|
        @logger.info("Grok loading patterns from file", :path => file)
        @grok.add_patterns_from_file(file)
      end
    end

    @grok.compile(@pattern)

    @logger.debug("Registered multiline plugin", :type => @type, :config => @config)
  end # def register

  public
  def filter(event)
    key = event.sprintf(@stream_identity)

    ## if there's anything pending unmatched, we need to push it out
    ## and then push the event back on the filter queue (yield)
    if @pending_unmatched[key]
      @logger.info("Related", :unmatched => key)
    clone = event.clone
        event.overwrite(@pending_unmatched[key])
        @pending_unmatched.delete(key)
    yield clone
    return
    end
    return unless filter?(event);

    if event["message"].is_a?(Array)
      match = @grok.match(event["message"].first)
    else
      match = @grok.match(event["message"])
    end
    pending = @pending[key]

    @logger.debug("Related", :pattern => @pattern, :message => event["message"],
                  :match => match, :capture => @capture)

    if !match 
      if pending
        @pending_unmatched[key] = event.clone
        event.overwrite(pending)
    @pending.delete(key)
    collapse_event!(event)
        filter_matched(event)
      end
      return
    end
    ## from here out, we've matched
    if pending
      if match.captures[@capture] == @previous[key]
          pending.append(event)
          pending.tag "related"
      event.cancel
      else
        @pending[key] = event.clone
        @previous[key] = match.captures[@capture]
        event.overwrite(pending)
    collapse_event!(event)
        filter_matched(event)
      end
    else
       @pending[key] = event
       @previous[key] = match.captures[@capture]
       event.cancel
    end
  end # def filter

  # Flush any pending messages. This is generally used for unit testing only.
  #
  # Note: flush is disabled now; it is preferable to use the multiline codec.
  public
  def __flush
    events = []
    @pending.each do |key, value|
      value.uncancel
      events << value
    end
    @pending.clear
    return events
  end # def flush

  def collapse_event!(event)
    event["message"] = event["message"].join("\n") if event["message"].is_a?(Array)
    event["@timestamp"] = event["@timestamp"].first if event["@timestamp"].is_a?(Array)
    event
  end
end # class LogStash::Filters::Related


And then modify lib/event.rb to add the commented line

public
def overwrite(event)
  @data = event.to_hash
  #convert timestamp if it is a String
  if @data[TIMESTAMP].is_a?(String)
    @data[TIMESTAMP] = LogStash::Time.parse_iso8601(@data[TIMESTAMP])
  end
  # add the line below.  This is needed because if we are overwriting data["message"]
  # with an array, we need to regenerate the accessors otherwise it uses the message
  # before it was overwritten
  @accessors = LogStash::Util::Accessors.new(@data)
end

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复