With Logstash, how do you combine lines based on matching same timestamp on every line?

后端 未结 1 1884
伪装坚强ぢ
伪装坚强ぢ 2021-01-22 16:59

MarkLogic log files denote mulitple lines by printing the lines with the same timestamp down to the millisecond. You can see that in this snippet:

2014-05-09 18:         


        
相关标签:
1条回答
  • 2021-01-22 17:52

    This seemed like an interesting problem, so I wrote a plugin for it, based loosely on the multiline plugin, but there are some complications. There is a bug in the core logstash code that needs to be fixed for the plugin though.

    Extract the following into lib/filters/related.rb

    # encoding: utf-8
    require "logstash/filters/base"
    require "logstash/namespace"
    require "set"
    #
    # This filter will collapse multiline messages from a single source into one Logstash event.
    # if they are related based on a pattern defined.  Two events are related if the pattern
    # match returns the same thing for consecutive events.
    # 
    # The config looks like this:
    #
    #     filter {
    #       related {
    #         type => "type"
    #         pattern => "^%{TIMESTAMP:time}"
    #         capture => "TIMESTAMP:time"
    #       }
    #     }
    # 
    # The `pattern` is the pattern that is used to match the lines
    # The `capture` is the named capture that has to match between the lines
    #
    class LogStash::Filters::Related < LogStash::Filters::Base
    
      config_name "related"
      milestone 1
    
      # The regular expression to match.
      config :pattern, :validate => :string, :required => true
      config :capture, :validate => :string, :required => true
    
      # The stream identity is how the multiline filter determines which stream an
      # event belongs to. This is generally used for differentiating, say, events
      # coming from multiple files in the same file input, or multiple connections
      # coming from a tcp input.
      #
      # The default value here is usually what you want, but there are some cases
      # where you want to change it. One such example is if you are using a tcp
      # input with only one client connecting at any time. If that client
      # reconnects (due to error or client restart), then logstash will identify
      # the new connection as a new stream and break any multiline goodness that
      # may have occurred between the old and new connection. To solve this use
      # case, you can use "%{@source_host}.%{@type}" instead.
      config :stream_identity , :validate => :string, :default => "%{host}.%{path}.%{type}"
    
      # Logstash ships by default with a bunch of patterns, so you don't
      # necessarily need to define this yourself unless you are adding additional
      # patterns.
      #
      # Pattern files are plain text with format:
      #
      #     NAME PATTERN
      #
      # For example:
      #
      #     NUMBER \d+
      config :patterns_dir, :validate => :array, :default => []
    
      # Detect if we are running from a jarfile, pick the right path.
      @@patterns_path = Set.new
      if __FILE__ =~ /file:\/.*\.jar!.*/
        @@patterns_path += ["#{File.dirname(__FILE__)}/../../patterns/*"]
      else
        @@patterns_path += ["#{File.dirname(__FILE__)}/../../../patterns/*"]
      end
    
      public
      def initialize(config = {})
        super
    
        @threadsafe = false
    
        # This filter needs to keep state.
        @types = Hash.new { |h,k| h[k] = [] }
        @pending_unmatched = Hash.new
        @pending = Hash.new
        @previous = Hash.new
      end # def initialize
    
      public
      def register
        require "grok-pure" # rubygem 'jls-grok'
    
        @grok = Grok.new
    
        @patterns_dir = @@patterns_path.to_a + @patterns_dir
        @patterns_dir.each do |path|
          # Can't read relative paths from jars, try to normalize away '../'
          while path =~ /file:\/.*\.jar!.*\/\.\.\//
            # replace /foo/bar/../baz => /foo/baz
            path = path.gsub(/[^\/]+\/\.\.\//, "")
          end
    
          if File.directory?(path)
            path = File.join(path, "*")
          end
    
          Dir.glob(path).each do |file|
            @logger.info("Grok loading patterns from file", :path => file)
            @grok.add_patterns_from_file(file)
          end
        end
    
        @grok.compile(@pattern)
    
        @logger.debug("Registered multiline plugin", :type => @type, :config => @config)
      end # def register
    
      public
      def filter(event)
        key = event.sprintf(@stream_identity)
    
        ## if there's anything pending unmatched, we need to push it out
        ## and then push the event back on the filter queue (yield)
        if @pending_unmatched[key]
          @logger.info("Related", :unmatched => key)
        clone = event.clone
            event.overwrite(@pending_unmatched[key])
            @pending_unmatched.delete(key)
        yield clone
        return
        end
        return unless filter?(event);
    
        if event["message"].is_a?(Array)
          match = @grok.match(event["message"].first)
        else
          match = @grok.match(event["message"])
        end
        pending = @pending[key]
    
        @logger.debug("Related", :pattern => @pattern, :message => event["message"],
                      :match => match, :capture => @capture)
    
        if !match 
          if pending
            @pending_unmatched[key] = event.clone
            event.overwrite(pending)
        @pending.delete(key)
        collapse_event!(event)
            filter_matched(event)
          end
          return
        end
        ## from here out, we've matched
        if pending
          if match.captures[@capture] == @previous[key]
              pending.append(event)
              pending.tag "related"
          event.cancel
          else
            @pending[key] = event.clone
            @previous[key] = match.captures[@capture]
            event.overwrite(pending)
        collapse_event!(event)
            filter_matched(event)
          end
        else
           @pending[key] = event
           @previous[key] = match.captures[@capture]
           event.cancel
        end
      end # def filter
    
      # Flush any pending messages. This is generally used for unit testing only.
      #
      # Note: flush is disabled now; it is preferable to use the multiline codec.
      public
      def __flush
        events = []
        @pending.each do |key, value|
          value.uncancel
          events << value
        end
        @pending.clear
        return events
      end # def flush
    
      def collapse_event!(event)
        event["message"] = event["message"].join("\n") if event["message"].is_a?(Array)
        event["@timestamp"] = event["@timestamp"].first if event["@timestamp"].is_a?(Array)
        event
      end
    end # class LogStash::Filters::Related
    

    And then modify lib/event.rb to add the commented line

    public
    def overwrite(event)
      @data = event.to_hash
      #convert timestamp if it is a String
      if @data[TIMESTAMP].is_a?(String)
        @data[TIMESTAMP] = LogStash::Time.parse_iso8601(@data[TIMESTAMP])
      end
      # add the line below.  This is needed because if we are overwriting data["message"]
      # with an array, we need to regenerate the accessors otherwise it uses the message
      # before it was overwritten
      @accessors = LogStash::Util::Accessors.new(@data)
    end
    
    0 讨论(0)
提交回复
热议问题