Ruby - How to retrieve sum in array group by multiple keys with condition max

后端未结

关注

 3  1559

The original array is

[
    {\"id\"=>2, \"idx\"=>111, \"money\"=>\"4.00\", \"money1\"=>\"1.00\", \"order\"=>\"001\", \"order1\"=>\"1\"},
    {\


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  小蘑菇        
                
              
                            
                2021-01-22 16:29
              
            
            
                                                                       
This should be pretty close of what you asked

def f_addition(arr, group_fields, sum_fields, max_fields)
  arr.group_by do |h|
    group_fields.map {|k| h[k]}
  end.values.map do |group|
    sums = group.each_with_object(Hash.new(0)) do |h, acc|
      sum_fields.each {|sum_f| acc[sum_f] += h[sum_f].to_f }
    end
    group.first.merge(sums)
  end.flatten
end

arr = [
    {"id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00", "order"=>"001", "order1"=>"1"},
    {"id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00", "order"=>"001", "order1"=>"1"},
    {"id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00", "order"=>"002", "order1"=>"2"}
]


f_addition(arr, ["order","order1"], ["money","money1"], ["id", "idx"] )

# [
#   {"id"=>2, "idx"=>111, "money"=>6.0, "money1"=>3.0, "order"=>"001", "order1"=>"1"}
#   {"id"=>3, "idx"=>113, "money"=>3.0, "money1"=>1.0, "order"=>"002", "order1"=>"2"}
# ]

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  悲哀的现实        
                
              
                            
                2021-01-22 16:37
              
            
            
                                                                       
One way of doing this is to use the form of Hash#update (aka merge!) that uses a block to determine the values of keys that are present in both hashes being merged.

Code

def f_addition(arr, group_fields, sum_fields, max_fields)
  arr.each_with_object({}) do |h,g|
    g.update( h.values_at(*group_fields) => h ) do |_,gv,hv|
      gv.merge(hv) do |k,gvv,hvv|
        case
        when sum_fields.include?(k) then "%.2f" % (gvv.to_f + hvv.to_f)
        when max_fields.include?(k) then [gvv, hvv].max
        else gvv
        end
      end
    end
  end.values
end


Example

arr = [{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
         "order"=>"001", "order1"=>"1", "pet"=>"dog" },
       { "id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00",
         "order"=>"001", "order1"=>"1", "sport"=>"darts" },
       { "id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00",
         "order"=>"002", "order1"=>"2" }]


Notice that the this array is slightly different from from the one given in the question. I have added "pet"=>"dog" to the first (hash) element "sport"=>"darts"and to the second hash.

f_addition(arr, ["order","order1"], ["money","money1"], ["id", "idx"] )
  #=> [{ "id"=>2, "idx"=>112, "money"=>"6.00", "money1"=>"3.00",
  #      "order"=>"001", "order1"=>"1", "pet"=>"dog", "sport"=>"darts"},
  #    { "id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00",
  #      "order"=>"002", "order1"=>"2" }] 


Explanation

For the example above:

group_fields = ["order", "order1"]
sum_fields   = ["money", "money1"]
max_fields   = ["id", "idx"]

enum = arr.each_with_object({})
  #=> #<Enumerator: [{"id"=>2, "idx"=>111,..., "pet"=>"dog"},
  #     {"id"=>1, "idx"=>112,..., "sport"=>"darts"},
  #     {"id"=>3,"idx"=>113,...,"order1"=>"2"}]:each_with_object({})> 


Array#each passes each element of this enumerator into the block and assigns it to the block variables. The first element passed is:

h, g = enum.next
  #=> [{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
         "order"=>"001", "order1"=>"1", "pet"=>"dog" }, {}]  
h #=>  { "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
         "order"=>"001", "order1"=>"1", "pet"=>"dog" } 
g #=>  {} 


As:

h.values_at(*group_fields)
  #=> h.values_at(*["order", "order1"])
  #=> h.values_at("order", "order1")
  #=> ["001", "1"]


we compute:

g.update(["001", "1"] => h) do |k,gv,hv| ... end


which is shorthand for:

g.update({ ["001", "1"] => h }) do |k,gv,hv| ... end


The block do |k,gv,hv| ... end is only used when the two hashes being merged both contain the key k.¹ As g = {} contains no keys, the block is not used at this time:

g.update({ ["001", "1"] => h })
  #=> {}.update({ ["001", "1"]=>{ "id"=>2, "idx"=>111, "money"=>"4.00",
  #                               "money1"=>"1.00", "order"=>"001",
  #                               "order1"=>"1", "pet"=>"dog" } }
  #=> { ["001", "1"]=>{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
  #                     "order"=>"001", "order1"=>"1", "pet"=>"dog" } } 


where the value returned by update is the new value of g.

The next value of enum passed into the block is:

h, g = enum.next
h #=> { "id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00",
  #     "order"=>"001", "order1"=>"1", "sport"=>"darts" },
g #=> { ["001", "1"]=>{ "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
  #      "order"=>"001", "order1"=>"1", "pet"=>"dog" } }] 


As:

h.values_at(*group_fields)
  #=> h.values_at("order", "order1")
  #=> ["001", "1"]


we compute:

g.update(["001", "1"] => h) do |k,gv,hv| ... end


As g and { ["001", "1"] => h } both contain the key ["001", "1"], we must defer to the block to determine the value of that key in the merged hash. We have:

k  = ["001", "1"]
gv = { "id"=>2, "idx"=>111, "money"=>"4.00", "money1"=>"1.00",
       "order"=>"001", "order1"=>"1", "pet"=>"dog" }
hv = { "id"=>1, "idx"=>112, "money"=>"2.00", "money1"=>"2.00",
       "order"=>"001", "order1"=>"1", "sport"=>"darts" }


We therefore evaluate the block as follows (using merge rather than merge!/update):

gv.merge(hv) do |k,gvv,hvv|
  case
  when sum_fields.include?(k) then "%.2f" % (gvv.to_f + hvv.to_f)
  when max_fields.include?(k) then [gvv, hvv].max
  else gvv
  end
end
  #=> { "id"=>2, "idx"=>112, "money"=>"6.00", "money1"=>"3.00",
  #     "order"=>"001", "order1"=>"1", "pet"=>"dog", "sport"=>"darts"}


gv does not contain the key "sport", so the block is not used when merging "sport"=>"darts" into gv. All other keys of hvv are present in gvv, however, so we use the block to determine their values in the merged hash. For:

k = "money"
gvv = "4.00"
hvv = "2.00"


we find:

sum_fields.include?(k)
  #=> ["money", "money1"].include?("money")
  #=> true


so the case statement returns:

"%.2f" % (gvv.to_f + hvv.to_f)
  #=> "%.2f" % ("4.00".to_f + "2.00".to_f)
  #=> "6.00"


The values for other keys of hv, the hash being merged into gv, are computed similarly, to give us a new value for the merged hash g.

Lastly,

{ ["002", "order1"] => { "id"=>3, "idx"=>113, "money"=>"3.00",
                         "money1"=>"1.00", "order"=>"002", "order1"=>"2" }]


is merged into g (which does not require the use update's block) and g.values is returned by the method.

Observation

It would be easy to generalize this to pass pairs such as:

[["money","money1"], ->(a,b) { "%.2f" % (a.to_f + b.to_f) }]
[["id", "idx"], :max]


This could be done as follows:

def f_addition(arr, group_fields, *mods)
  arr.each_with_object({}) do |h,g|
    g.update( h.values_at(*group_fields) => h ) do |_,gv,hv|
      gv.merge(hv) do |k,gvv,hvv|
        f,op = mods.find { |f,op| f.include?(k) }
        if f
          case op
          when Proc   then op.call(gvv,hvv)
          when Symbol then [gvv, hvv].send(op)
          end
        else
          gvv
        end
      end
    end
  end.values
end

f_addition(arr, ["order","order1"],
                [["money","money1"], ->(a,b) { "%.2f" % (a.to_f + b.to_f) }],
                [["id", "idx"], :max])
  # => [{ "id"=>2, "idx"=>112, "money"=>"6.00", "money1"=>"3.00",
  #       "order"=>"001", "order1"=>"1", "pet"=>"dog", "sport"=>"darts" },
  #      { "id"=>3, "idx"=>113, "money"=>"3.00", "money1"=>"1.00",
  #        "order"=>"002", "order1"=>"2" }]


^{1. We will find that the calculations in the block do not depend on the block variable `k`.
   I've therefore replaced that variable with the local variable _, to so-inform the reader.}
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  孤城傲影        
                
              
                            
                2021-01-22 16:49
              
            
            
                                                                       
I used this code 

def aggregate(arr, group_fields, sum_fields, max_fields)
arr.group_by { |x| x.values_at(*group_fields) }.map {|key, hashes|
  result = hashes[0].clone
  max_fields.each { |k|
      hashes.map! {|h| h.merge(h) { |k, v| Integer(v) rescue v }}
      #hashes.map! { |h| h.each_pair { | k, v | h[k] = Integer(v) rescue v }}
      result[k] = hashes.max_by { |h| h[k]}[k]
  }

  sum_fields.each { |k|
    result[k] = hashes.inject(0) { |s, x| s + x[k].to_f }
  }
  result
}end


One thing not good is that to convert all fields to int (convert to int for comparison in case: "id"=>"12" and "id"=>"2"). Should convert max_fields only, but I dont think a solution yet. Code for convert is:

hashes.map! {|h| h.merge(h) { |k, v| Integer(v) rescue v }}
Or

hashes.map! { |h| h.each_pair { | k, v | h[k] = Integer(v) rescue v

So, its great if someone could solve this weak point.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复