Ruby - How to retrieve sum in array group by multiple keys with condition max

后端 未结 3 1554
暗喜
暗喜 2021-01-22 15:51

The original array is

[
    {\"id\"=>2, \"idx\"=>111, \"money\"=>\"4.00\", \"money1\"=>\"1.00\", \"order\"=>\"001\", \"order1\"=>\"1\"},
    {\         


        
相关标签:
3条回答
  • 2021-01-22 16:29

    This should be pretty close of what you asked

    def f_addition(arr, group_fields, sum_fields, max_fields)
      arr.group_by do |h|
        group_fields.map {|k| h[k]}
      end.values.map do |group|
        sums = group.each_with_object(Hash.new(0)) do |h, acc|
          sum_fields.each {|sum_f| acc[sum_f] += h[sum_f].to_f }
        end
        group.first.merge(sums)
      end.flatten
    end
    
    arr = [
        {"id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00", "order"=>"001", "order1"=>"1"},
        {"id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00", "order"=>"001", "order1"=>"1"},
        {"id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00", "order"=>"002", "order1"=>"2"}
    ]
    
    
    f_addition(arr, ["order","order1"], ["money","money1"], ["id", "idx"] )
    
    # [
    #   {"id"=>2, "idx"=>111, "money"=>6.0, "money1"=>3.0, "order"=>"001", "order1"=>"1"}
    #   {"id"=>3, "idx"=>113, "money"=>3.0, "money1"=>1.0, "order"=>"002", "order1"=>"2"}
    # ]
    
    0 讨论(0)
  • 2021-01-22 16:37

    One way of doing this is to use the form of Hash#update (aka merge!) that uses a block to determine the values of keys that are present in both hashes being merged.

    Code

    def f_addition(arr, group_fields, sum_fields, max_fields)
      arr.each_with_object({}) do |h,g|
        g.update( h.values_at(*group_fields) => h ) do |_,gv,hv|
          gv.merge(hv) do |k,gvv,hvv|
            case
            when sum_fields.include?(k) then "%.2f" % (gvv.to_f + hvv.to_f)
            when max_fields.include?(k) then [gvv, hvv].max
            else gvv
            end
          end
        end
      end.values
    end
    

    Example

    arr = [{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
             "order"=>"001", "order1"=>"1", "pet"=>"dog" },
           { "id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00",
             "order"=>"001", "order1"=>"1", "sport"=>"darts" },
           { "id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00",
             "order"=>"002", "order1"=>"2" }]
    

    Notice that the this array is slightly different from from the one given in the question. I have added "pet"=>"dog" to the first (hash) element "sport"=>"darts"and to the second hash.

    f_addition(arr, ["order","order1"], ["money","money1"], ["id", "idx"] )
      #=> [{ "id"=>2, "idx"=>112, "money"=>"6.00", "money1"=>"3.00",
      #      "order"=>"001", "order1"=>"1", "pet"=>"dog", "sport"=>"darts"},
      #    { "id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00",
      #      "order"=>"002", "order1"=>"2" }] 
    

    Explanation

    For the example above:

    group_fields = ["order", "order1"]
    sum_fields   = ["money", "money1"]
    max_fields   = ["id", "idx"]
    
    enum = arr.each_with_object({})
      #=> #<Enumerator: [{"id"=>2, "idx"=>111,..., "pet"=>"dog"},
      #     {"id"=>1, "idx"=>112,..., "sport"=>"darts"},
      #     {"id"=>3,"idx"=>113,...,"order1"=>"2"}]:each_with_object({})> 
    

    Array#each passes each element of this enumerator into the block and assigns it to the block variables. The first element passed is:

    h, g = enum.next
      #=> [{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
             "order"=>"001", "order1"=>"1", "pet"=>"dog" }, {}]  
    h #=>  { "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
             "order"=>"001", "order1"=>"1", "pet"=>"dog" } 
    g #=>  {} 
    

    As:

    h.values_at(*group_fields)
      #=> h.values_at(*["order", "order1"])
      #=> h.values_at("order", "order1")
      #=> ["001", "1"]
    

    we compute:

    g.update(["001", "1"] => h) do |k,gv,hv| ... end
    

    which is shorthand for:

    g.update({ ["001", "1"] => h }) do |k,gv,hv| ... end
    

    The block do |k,gv,hv| ... end is only used when the two hashes being merged both contain the key k.1 As g = {} contains no keys, the block is not used at this time:

    g.update({ ["001", "1"] => h })
      #=> {}.update({ ["001", "1"]=>{ "id"=>2, "idx"=>111, "money"=>"4.00",
      #                               "money1"=>"1.00", "order"=>"001",
      #                               "order1"=>"1", "pet"=>"dog" } }
      #=> { ["001", "1"]=>{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
      #                     "order"=>"001", "order1"=>"1", "pet"=>"dog" } } 
    

    where the value returned by update is the new value of g.

    The next value of enum passed into the block is:

    h, g = enum.next
    h #=> { "id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00",
      #     "order"=>"001", "order1"=>"1", "sport"=>"darts" },
    g #=> { ["001", "1"]=>{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
      #      "order"=>"001", "order1"=>"1", "pet"=>"dog" } }] 
    

    As:

    h.values_at(*group_fields)
      #=> h.values_at("order", "order1")
      #=> ["001", "1"]
    

    we compute:

    g.update(["001", "1"] => h) do |k,gv,hv| ... end
    

    As g and { ["001", "1"] => h } both contain the key ["001", "1"], we must defer to the block to determine the value of that key in the merged hash. We have:

    k  = ["001", "1"]
    gv = { "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
           "order"=>"001", "order1"=>"1", "pet"=>"dog" }
    hv = { "id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00",
           "order"=>"001", "order1"=>"1", "sport"=>"darts" }
    

    We therefore evaluate the block as follows (using merge rather than merge!/update):

    gv.merge(hv) do |k,gvv,hvv|
      case
      when sum_fields.include?(k) then "%.2f" % (gvv.to_f + hvv.to_f)
      when max_fields.include?(k) then [gvv, hvv].max
      else gvv
      end
    end
      #=> { "id"=>2, "idx"=>112, "money"=>"6.00", "money1"=>"3.00",
      #     "order"=>"001", "order1"=>"1", "pet"=>"dog", "sport"=>"darts"}
    

    gv does not contain the key "sport", so the block is not used when merging "sport"=>"darts" into gv. All other keys of hvv are present in gvv, however, so we use the block to determine their values in the merged hash. For:

    k = "money"
    gvv = "4.00"
    hvv = "2.00"
    

    we find:

    sum_fields.include?(k)
      #=> ["money", "money1"].include?("money")
      #=> true
    

    so the case statement returns:

    "%.2f" % (gvv.to_f + hvv.to_f)
      #=> "%.2f" % ("4.00".to_f + "2.00".to_f)
      #=> "6.00"
    

    The values for other keys of hv, the hash being merged into gv, are computed similarly, to give us a new value for the merged hash g.

    Lastly,

    { ["002", "order1"] => { "id"=>3, "idx"=>113, "money"=>"3.00",
                             "money1"=>"1.00", "order"=>"002", "order1"=>"2" }]
    

    is merged into g (which does not require the use update's block) and g.values is returned by the method.

    Observation

    It would be easy to generalize this to pass pairs such as:

    [["money","money1"], ->(a,b) { "%.2f" % (a.to_f + b.to_f) }]
    [["id", "idx"], :max]
    

    This could be done as follows:

    def f_addition(arr, group_fields, *mods)
      arr.each_with_object({}) do |h,g|
        g.update( h.values_at(*group_fields) => h ) do |_,gv,hv|
          gv.merge(hv) do |k,gvv,hvv|
            f,op = mods.find { |f,op| f.include?(k) }
            if f
              case op
              when Proc   then op.call(gvv,hvv)
              when Symbol then [gvv, hvv].send(op)
              end
            else
              gvv
            end
          end
        end
      end.values
    end
    
    f_addition(arr, ["order","order1"],
                    [["money","money1"], ->(a,b) { "%.2f" % (a.to_f + b.to_f) }],
                    [["id", "idx"], :max])
      # => [{ "id"=>2, "idx"=>112, "money"=>"6.00", "money1"=>"3.00",
      #       "order"=>"001", "order1"=>"1", "pet"=>"dog", "sport"=>"darts" },
      #      { "id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00",
      #        "order"=>"002", "order1"=>"2" }]
    
    1. We will find that the calculations in the block do not depend on the block variable `k`.
       I've therefore replaced that variable with the local variable _, to so-inform the reader.
    0 讨论(0)
  • 2021-01-22 16:49

    I used this code

    def aggregate(arr, group_fields, sum_fields, max_fields)
    arr.group_by { |x| x.values_at(*group_fields) }.map {|key, hashes|
      result = hashes[0].clone
      max_fields.each { |k|
          hashes.map! {|h| h.merge(h) { |k, v| Integer(v) rescue v }}
          #hashes.map! { |h| h.each_pair { | k, v | h[k] = Integer(v) rescue v }}
          result[k] = hashes.max_by { |h| h[k]}[k]
      }
    
      sum_fields.each { |k|
        result[k] = hashes.inject(0) { |s, x| s + x[k].to_f }
      }
      result
    }end
    

    One thing not good is that to convert all fields to int (convert to int for comparison in case: "id"=>"12" and "id"=>"2"). Should convert max_fields only, but I dont think a solution yet. Code for convert is:

    hashes.map! {|h| h.merge(h) { |k, v| Integer(v) rescue v }} Or

    hashes.map! { |h| h.each_pair { | k, v | h[k] = Integer(v) rescue v

    So, its great if someone could solve this weak point.

    0 讨论(0)
提交回复
热议问题