Rails: Faster way to perform updates on many records

后端 未结 3 2064
执笔经年
执笔经年 2021-02-09 15:54

In our Rails 3.2.13 app (Ruby 2.0.0 + Postgres on Heroku), we are often retreiving a large amount of Order data from an API, and then we need to update or create each order in o

3条回答
  •  南旧
    南旧 (楼主)
    2021-02-09 16:36

    You can monkey-patch ActiveRecord like this:

    class ActiveRecord::Base
    
      #http://stackoverflow.com/questions/15317837/bulk-insert-records-into-active-record-table?lq=1
      #https://gist.github.com/jackrg/76ade1724bd816292e4e
      #  "UPDATE THIS SET   FROM  THIS  JOIN (VALUES (, ,...) VALS (  ) ON "
      def self.bulk_update(record_list)
          pk = self.primary_key
          raise "primary_key not found" unless pk.present?
    
          raise "record_list not an Array of Hashes" unless record_list.is_a?(Array) && record_list.all? {|rec| rec.is_a? Hash }
          return nil if record_list.empty?
    
          result = nil
    
          #test if every hash has primary keys, so we can JOIN
          record_list.each { |r|  raise "Primary Keys '#{self.primary_key.to_s}' not found on record: #{r}" unless hasAllPKs?(r) }
    
    
          #list of primary keys comparison
          pk_comparison_array = []
          if (pk).is_a?(Array)
              pk.each {|thiskey| pk_comparison_array << "THIS.#{thiskey} = VALS.#{thiskey}" }
          else
              pk_comparison_array << "THIS.#{pk} = VALS.#{pk}"
          end
          pk_comparison = pk_comparison_array.join(' AND ')
    
          #SQL
          (1..record_list.count).step(1000).each do |start|
            key_list, value_list = convert_record_list(record_list[start-1..start+999])
            #csv values
            csv_vals = value_list.map {|v| "(#{v.join(", ")})" }.join(", ")
            #column names
            column_names = key_list.join(", ")
            #list of columns assignments
            columns_assign_array = []
            key_list.each {|col|
              unless inPK?(col)
                columns_assign_array << "THIS.#{col} = VALS.#{col}"
              end }
            columns_assign = columns_assign_array.join(', ')
    
            sql = "UPDATE THIS SET #{columns_assign}  FROM #{self.table_name} THIS  JOIN ( VALUES #{csv_vals} ) VALS ( #{column_names} ) ON ( #{pk_comparison} )"
            result = self.connection.execute(sql)
    
            return result if result<0
          end
    
          return result
    
      end
    
      def self.inPK?(str)
          pk = self.primary_key
    
          test = str.to_s
          if pk.is_a?(Array)
                (pk.include?(test))
          else
                (pk==test)
          end
      end
    
      #test if given hash has primary keys included as hash keys and those keys are not empty
      def self.hasAllPKs?(hash)
          h = hash.stringify_keys
          pk = self.primary_key
    
          if pk.is_a?(Array)
               (pk.all? {|k| h.key?(k) and h[k].present? })
          else
               h.key?(pk) and h[pk].present?
          end
      end
    
      def self.convert_record_list(record_list)
        # Build the list of keys
        key_list = record_list.map(&:keys).flatten.map(&:to_s).uniq.sort
    
        value_list = record_list.map do |rec|
          list = []
          key_list.each {|key| list <<  ActiveRecord::Base.connection.quote(rec[key] || rec[key.to_sym]) }
          list
        end
    
        # If table has standard timestamps and they're not in the record list then add them to the record list
        time = ActiveRecord::Base.connection.quote(Time.now)
        for field_name in %w(created_at updated_at)
          if self.column_names.include?(field_name) && !(key_list.include?(field_name))
            key_list << field_name
            value_list.each {|rec| rec << time }
          end
        end
    
        return [key_list, value_list]
      end
    end
    

    Then, you can generate a array of hashes containing your models attributes (including theirs primary keys) and do something like:

    ActiveRecord::Base.transaction do
       Model.bulk_update [ {attr1: val1, attr2: val2,...},  {attr1: val1, attr2: val2,...},   ... ]
    end
    

    It will be a single SQL command without Rails callbacks and validations.

提交回复
热议问题