Creating confusion matrix from multiple .csv files

前端未结

关注

 2  1613

I have a lot of .csv files with the following format.


                      
              相关标签:


      
      
        
          2条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  [愿得一人]        
                
              
                            
                2021-01-28 07:45
              
            
            
                                                                       
Add a def get_predict(filename)

def get_predict(filename):
    if 'Alex' in filename:
        return 'Alexander'
    else:
        return filename [0]




Reading n files, compute confusion matrix using pandas crosstab:

import os
import pandas as pd

def get_category(filepath):
    def category(val):
        print('predict({}; abs({})'.format(val, abs(val)))
        if 0.8 < val <= 0.9:
            return "A"
        if abs(val - 0.7) < 1e-10:
            return "B"
        if 0.5 < val < 0.7:
            return "C"
        if abs(val - 0.5) < 1e-10:
            return "E"
        return "D"

    with open(filepath, "r") as csvfile:
        ff = csv.reader(csvfile)

        results = []
        previous_value = 0
        for col1, col2 in ff:
            value = int(col1)
            if value >= previous_value:
                previous_value = value
            else:
                results.append(value / previous_value)
                previous_value = value

    return category(sum(results) / len(results))

matrix = {'actual':[], 'predict':[]}
path = 'test/confusion'
for filename in os.listdir( path ):
    # The first Char in filename is Predict Key
    matrix['predict'].append(filename[0])
    matrix['actual'].append(get_category(os.path.join(path, filename)))

df = pd.crosstab(pd.Series(matrix['actual'], name='Actual'),
                 pd.Series(matrix['predict'], name='Predicted')
                 )
print(df)



  Output:  (Reading "A.csv, B.csv, C.csv" with the given example Data three times)

Predicted  A  B  C
Actual            
A          3  0  0
B          0  3  0
C          0  0  3



Tested with Python:3.4.2 - pandas:0.19.2
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  [愿得一人]        
                
              
                            
                2021-01-28 07:48
              
            
            
                                                                       
Using Scikit-Learn is the best option to go for in your case as it provides a confusion_matrix function. Here is an approach you can easily extend.

from sklearn.metrics import confusion_matrix

# Read your csv files
with open('A1.csv', 'r') as readFile:
    true_values = [int(ff) for ff in readFile]
with open('B1.csv', 'r') as readFile:
    predictions = [int(ff) for ff in readFile]

# Produce the confusion matrix
confusionMatrix = confusion_matrix(true_values, predictions)

print(confusionMatrix)


This is the output you would expect. 

[[0 2]
 [0 2]]


For more hint - check out the following link:

How to write a confusion matrix in Python?
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复