filter a content file to table

前端未结

关注

 2  861

this is the input I have generated , that displays the versions of courses for both Jany and Marco at different times .


                      
              相关标签:


      
      
        
          2条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  情歌与酒        
                
              
                            
                2021-01-24 18:10
              
            
            
                                                                       
With GNU awk for true multi-dimensional arrays and sorted_in:

$ cat tst.awk
BEGIN{ RS=""; FS="[[:space:]:]+" }
{
    for (i=11; i<=NF; i+=3) {
        sched[$7" "$8][$2":"$3][$i] = $(i+1)
        courses[$i]
    }
}
END {
    PROCINFO["sorted_in"] = "@ind_str_asc"
    for (name in sched) {
        printf "%s", name
        for (time in sched[name]) {
            printf ",%s", time
        }
        print ""
        for (course in courses) {
            printf "%s", course
            for (time in sched[name]) {
                printf ",%s", sched[name][time][course]
            }
            print ""
        }
        print ""
    }
}


.

$ gawk -f tst.awk file
Marco 1,10:00,14:00
applicaton,halfhour,onehours
theory,geo,programmation

Marco 2,10:00,14:00
applicaton,nothing,nothing
theory,history,philosophy

jany 1,10:00,14:00
applicaton,onehour,twohours
theory,nothing,nothing

jany 2,10:00,14:00
applicaton,twohour,twohours
theory,math,music


It doesn't exactly produce your posted expected output but I think that's because your posted expected output is wrong (e.g. check the output for jany 1 application 14:00 compared to your input - the input is twohours like my script produces but you say the expected output is halfhour).
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  逝去的感伤        
                
              
                            
                2021-01-24 18:26
              
            
            
                                                                       
Try this:

BEGIN {
    # set records separated by empty lines
    RS=""
    # set fields separated by newline, each record has 3 fields
    FS="\n"
}
{
    # remove undesired parts of every first line of a record
    sub("the course of ", "", $1)
    sub(" is :", "", $1)
    sub("on ", "", $1)
    # now store the rest in time and course
    time=$1
    course=$1
    # remove time from string to extract the course title
    sub("^[^ ]* ", "", course)
    # remove course title to retrieve time from string
    sub(course, "", time)
    # get theory info from second line per record
    sub("course:theory:", "", $2)
    # get application info from third line
    sub("course:applicaton:", "", $3)
    # if new course
    if (! (course in header)) {
        # save header information (first words of each line in output)
        header[course] = course
        theory[course] = "theory"
        app[course] = "application"
    }
    # append the relevant info to the output strings
    header[course] = header[course] "," time
    theory[course] = theory[course] "," $2
    app[course] = app[course] "," $3

}
END {
    # now for each course found
    for (key in header) {
        # print the strings constructed
        print header[key]
        print theory[key]
        print app[key]
        print ""
}


I hope the comments are self explanatory, if you have questions about the script be sure to ask them.
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复