filter a content file to table

前端 未结 2 859
野性不改
野性不改 2021-01-24 17:36

this is the input I have generated , that displays the versions of courses for both Jany and Marco at different times .

         


        
相关标签:
2条回答
  • 2021-01-24 18:10

    With GNU awk for true multi-dimensional arrays and sorted_in:

    $ cat tst.awk
    BEGIN{ RS=""; FS="[[:space:]:]+" }
    {
        for (i=11; i<=NF; i+=3) {
            sched[$7" "$8][$2":"$3][$i] = $(i+1)
            courses[$i]
        }
    }
    END {
        PROCINFO["sorted_in"] = "@ind_str_asc"
        for (name in sched) {
            printf "%s", name
            for (time in sched[name]) {
                printf ",%s", time
            }
            print ""
            for (course in courses) {
                printf "%s", course
                for (time in sched[name]) {
                    printf ",%s", sched[name][time][course]
                }
                print ""
            }
            print ""
        }
    }
    

    .

    $ gawk -f tst.awk file
    Marco 1,10:00,14:00
    applicaton,halfhour,onehours
    theory,geo,programmation
    
    Marco 2,10:00,14:00
    applicaton,nothing,nothing
    theory,history,philosophy
    
    jany 1,10:00,14:00
    applicaton,onehour,twohours
    theory,nothing,nothing
    
    jany 2,10:00,14:00
    applicaton,twohour,twohours
    theory,math,music
    

    It doesn't exactly produce your posted expected output but I think that's because your posted expected output is wrong (e.g. check the output for jany 1 application 14:00 compared to your input - the input is twohours like my script produces but you say the expected output is halfhour).

    0 讨论(0)
  • 2021-01-24 18:26

    Try this:

    BEGIN {
        # set records separated by empty lines
        RS=""
        # set fields separated by newline, each record has 3 fields
        FS="\n"
    }
    {
        # remove undesired parts of every first line of a record
        sub("the course of ", "", $1)
        sub(" is :", "", $1)
        sub("on ", "", $1)
        # now store the rest in time and course
        time=$1
        course=$1
        # remove time from string to extract the course title
        sub("^[^ ]* ", "", course)
        # remove course title to retrieve time from string
        sub(course, "", time)
        # get theory info from second line per record
        sub("course:theory:", "", $2)
        # get application info from third line
        sub("course:applicaton:", "", $3)
        # if new course
        if (! (course in header)) {
            # save header information (first words of each line in output)
            header[course] = course
            theory[course] = "theory"
            app[course] = "application"
        }
        # append the relevant info to the output strings
        header[course] = header[course] "," time
        theory[course] = theory[course] "," $2
        app[course] = app[course] "," $3
    
    }
    END {
        # now for each course found
        for (key in header) {
            # print the strings constructed
            print header[key]
            print theory[key]
            print app[key]
            print ""
    }
    

    I hope the comments are self explanatory, if you have questions about the script be sure to ask them.

    0 讨论(0)
提交回复
热议问题