skip some rows in read.csv in R

后端未结

关注

 3  1552

I have a csv file which I read using the following function:

csvData <- read.csv(file=\"pf.csv\", colClasses=c(NA, NA,\"NULL\",NA,\"NULL\",NA,\"NULL\",\"


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  爱一瞬间的悲伤        
                
              
                            
                2020-12-03 11:51
              
            
            
                                                                       
For me the sqldf package's read.csv.sql looked great at first blush.  But when I tried to use it, it failed to deal with "NULL" strings. (Others have found this out as well.)  Unfortunately, it doesn't support all of read.csv features.
So I had to write my own.  I am surprised that there isn't a good package for this.

fetchLines=function(inputFile,match,fixed=T,n=100,maxlines=100000){ #inputFile='simple.csv'; match='APPLE';
  message('reading:',inputFile)
  n=min(n,maxlines)
  con  <- base::file(inputFile, open = "r",encoding = "UTF-8-BOM")
  data=c(readLines(con, n = 1, warn = FALSE))
  while (length(oneLine <- readLines(con, n = n, warn = FALSE)) > 0) {
    grab=grep(match,oneLine,value=T,fixed=fixed)
    if(length(grab)>0){
      data=c(data,grab)
      if(length(data)>maxlines){
        warning("bailing out too many");
        return(data);
      }
      cat('.')
    }
  } 
  close(con)
  gc()
  cat("\n")
  data;
}

#To avoid: argument 'object' must deparse to a single character string
fdata=textConnection( fetchLines("datafile.csv",'\\bP58\\b',fixed=F,maxlines = 100000))
df<-read.csv(fdata,header=T,sep=",",na.strings = c('NULL',''),fileEncoding = "UTF-8-BOM",stringsAsFactors = F)


R textConnection: "argument 'object' must deparse to a single character string"
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  半阙折子戏        
                
              
                            
                2020-12-03 11:55
              
            
            
                                                                       
It is better to read all and subset later like suggested in the comment :

csvData [!csvData$ticker %in% c('ADCT','ABT'),]


EDIT

You can use fread from data.table package for more efficient method to read your file.

library(read.table)
fread(file="pf.csv")

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  忘掉有多难        
                
              
                            
                2020-12-03 12:01
              
            
            
                                                                       
It is possible using sqldf package, using read.csv.sql

Lets say the contents of sample.csv looks like this:

id,name,age
1,"a",23
2,"b",24
3,"c",23


Now to read only rows where age=23:

require(sqldf)

df <- read.csv.sql("sample.csv", "select * from file where age=23")

df
  id name age
1  1  "a"  23
2  3  "c"  23


It is possible to select necessary columns:

df <- read.csv.sql("sample.csv", "select id, name from file where age=23")
df
  id name
1  1  "a"
2  3  "c"

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复