Does this multiple pipes code in C makes sense?

后端 未结 5 1529
暖寄归人
暖寄归人 2020-12-18 13:39

I\'ve created a question about this a few days. My solution is something in the lines of what was suggested in the accepted answer. However, a friend of mine came up with th

相关标签:
5条回答
  • 2020-12-18 14:08

    This is my "final" code with ephemient suggestions:

    #include <stdio.h>
    #include <stdlib.h>
    #include <unistd.h>
    #include <wait.h>
    #include <string.h>
    #include <readline/readline.h>
    #include <readline/history.h>
    
    #define NUMPIPES 5
    #define NUMARGS 10
    
    int main(int argc, char *argv[]) {
        char *bBuffer, *sPtr, *aPtr = NULL, *pipeComms[NUMPIPES], *cmdArgs[NUMARGS];
        int newPipe[2], oldPipe[2], pCount, aCount, i, status;
        pid_t pid;
    
        using_history();
    
        while(1) {
            bBuffer = readline("\e[1;31mShell \e[1;32m# \e[0m");
    
            if(!strcasecmp(bBuffer, "exit")) {
                return 0;
            }
    
            if(strlen(bBuffer) > 0) {
                add_history(bBuffer);
            }
    
            sPtr = bBuffer;
            pCount = -1;
    
            do {
                aPtr = strsep(&sPtr, "|");
    
                if(aPtr != NULL) {
                    if(strlen(aPtr) > 0) {
                        pipeComms[++pCount] = aPtr;
                    }
                }
            } while(aPtr);
    
            cmdArgs[++pCount] = NULL;
    
            for(i = 0; i < pCount; i++) {
                aCount = -1;
    
                do {
                    aPtr = strsep(&pipeComms[i], " ");
    
                    if(aPtr != NULL) {
                        if(strlen(aPtr) > 0) {
                            cmdArgs[++aCount] = aPtr;
                        }
                    }
                } while(aPtr);
    
                cmdArgs[++aCount] = NULL;
    
                // do we have a next command?
                if(i < pCount-1) {
                    pipe(newPipe);
                }
    
                pid = fork();
    
                if(pid == 0) {
                    // do we have a previous command?
                    if(i > 0) {
                        close(oldPipe[1]);
                        dup2(oldPipe[0], 0);
                        close(oldPipe[0]);
                    }
    
                    // do we have a next command?
                    if(i < pCount-1) {
                        close(newPipe[0]);
                        dup2(newPipe[1], 1);
                        close(newPipe[1]);
                    }
    
                    // execute command...
                    execvp(cmdArgs[0], cmdArgs);
                    exit(1);
                } else {
                    // do we have a previous command?
                    if(i > 0) {
                        close(oldPipe[0]);
                        close(oldPipe[1]);
                    }
    
                    // do we have a next command?
                    if(i < pCount-1) {
                        oldPipe[0] = newPipe[0];
                        oldPipe[1] = newPipe[1];
                    }
    
                    // wait for last command process?
                    if(i == pCount-1) {
                        waitpid(pid, &status, 0);
                    }
                }
            }
        }
    
        return 0;
    }
    

    Is it ok now?

    0 讨论(0)
  • 2020-12-18 14:13

    The key problem is that you create a bunch of pipes and don't make sure that all the ends are closed properly. If you create a pipe, you get two file descriptors; if you fork, then you have four file descriptors. If you dup() or dup2() one end of the pipe to a standard descriptor, you need to close both ends of the pipe - at least one of the closes must be after the dup() or dup2() operation.


    Consider the file descriptors available to the first command (assuming there are at least two - something that should be handled in general (no pipe() or I/O redirection needed with just one command), but I recognize that the error handling is eliminated to keep the code suitable for SO):

        std=dup(1);    // Likely: std = 3
        pipe(fd);      // Likely: fd[0] = 4, fd[1] = 5
        aux = fd[0];
        dup2(fd[1], 1);
        close(fd[1]);  // Closes 5
    
        if (fork() == 0) {
             // Need to close: fd[0] aka aux = 4
             // Need to close: std = 3
             close(fd[0]);
             close(std);
             execlp(argv[i], argv[i], NULL);
             exit(1);
        }
    

    Note that because fd[0] is not closed in the child, the child will never get EOF on its standard input; this is usually problematic. The non-closure of std is less critical.


    Revisiting amended code (as of 2009-06-03T20:52-07:00)...

    Assume that process starts with file descriptors 0, 1, 2 (standard input, output, error) open only. Also assume we have exactly 3 commands to process. As before, this code writes out the loop with annotations.

    std0 = dup(0); // backup stdin - 3
    std1 = dup(1); // backup stdout - 4
    
    // Iteration 1 (i == 1)
    // We have another command
    pipe(fd);   // fd[0] = 5; fd[1] = 6
    aux = fd[0]; // aux = 5
    dup2(fd[1], 1);
    close(fd[1]);       // 6 closed
    // Not last command
    if (fork() == 0) {
        // Not last command
        close(std1);    // 4 closed
        close(fd[0]);   // 5 closed
        // Minor problemette: 3 still open
        execlp(argv[i], argv[i], NULL);
        }
    // Parent has open 3, 4, 5 - no problem
    
    // Iteration 2 (i == 2)
    // There was a previous command
    dup2(aux, 0);      // stdin now on read end of pipe
    close(aux);        // 5 closed
    // We have another command
    pipe(fd);          // fd[0] = 5; fd[1] = 6
    aux = fd[0];
    dup2(fd[1], 1);
    close(fd[1]);      // 6 closed
    // Not last command
    if (fork() == 0) {
        // Not last command
        close(std1);   // 4 closed
        close(fd[0]);  // 5 closed
        // As before, 3 is still open - not a major problem
        execlp(argv[i], argv[i], NULL);
        }
    // Parent has open 3, 4, 5 - no problem
    
    // Iteration 3 (i == 3)
    // We have a previous command
    dup2(aux, 0);      // stdin is now read end of pipe 
    close(aux);        // 5 closed
    // No more commands
    
    // Last command - restore stdout...
    dup2(std1, 1);     // stdin is back where it started
    close(std1);       // 4 closed
    
    if (fork() == 0) {
        // Last command
        // 3 still open
        execlp(argv[i], argv[i], NULL);
    }
    // Parent has closed 4 when it should not have done so!!!
    // End of loop
    // restore stdin to be able to keep using the shell
    dup2(std0, 0);
    // 3 still open - as desired
    

    So, all the children have the original standard input connected as file descriptor 3. This is not ideal, though it is not dreadfully traumatic; I'm hard pressed to find a circumstance where this would matter.

    Closing file descriptor 4 in the parent is a mistake - the next iteration of 'read a command and process it won't work because std1 is not initialized inside the loop.

    Generally, this is close to correct - but not quite correct.

    0 讨论(0)
  • 2020-12-18 14:22

    Looks reasonable, though it really needs to fix leaking std and aux to the children and after the loop, and the parent's original stdin is lost forever.

    This would probably be better with color...

    ./a.out foo bar baz <stdin >stdout
    std = dup(stdout)     ||     |+==========================std
                          ||     ||                          ||
    pipe(fd)              ||     ||    pipe1[0] -- pipe0[1]  ||
                          ||     ||       ||          ||     ||
    aux = fd[0]           ||     ||      aux          ||     ||
                          ||     XX       ||          ||     ||
                          ||      /-------++----------+|     ||
    dup2(fd[1], 1)        ||     //       ||          ||     ||
                          ||     ||       ||          ||     ||
    close(fd[1])          ||     ||       ||          XX     ||
                          ||     ||       ||                 ||
    fork+exec(foo)        ||     ||       ||                 ||
                          XX     ||       ||                 ||
                           /-----++-------+|                 ||
    dup2(aux, 0)          //     ||       ||                 ||
                          ||     ||       ||                 ||
    close(aux)            ||     ||       XX                 ||
                          ||     ||                          ||
    pipe(fd)              ||     ||    pipe2[0] -- pipe2[1]  ||
                          ||     ||       ||          ||     ||
    aux = fd[0]           ||     ||      aux          ||     ||
                          ||     XX       ||          ||     ||
                          ||      /-------++----------+|     ||
    dup2(fd[1], 1)        ||     //       ||          ||     ||
                          ||     ||       ||          ||     ||
    close(fd[1])          ||     ||       ||          XX     ||
                          ||     ||       ||                 ||
    fork+exec(bar)        ||     ||       ||                 ||
                          XX     ||       ||                 ||
                           /-----++-------+|                 ||
    dup2(aux, 0)          //     ||       ||                 ||
                          ||     ||       ||                 ||
    close(aux)            ||     ||       XX                 ||
                          ||     ||                          ||
    pipe(fd)              ||     ||    pipe3[0] -- pipe3[1]  ||
                          ||     ||       ||          ||     ||
    aux = fd[0]           ||     ||      aux          ||     ||
                          ||     XX       ||          ||     ||
                          ||      /-------++----------+|     ||
    dup2(fd[1], 1)        ||     //       ||          ||     ||
                          ||     ||       ||          ||     ||
    close(fd[1])          ||     ||       ||          XX     ||
                          ||     XX       ||                 ||
                          ||      /-------++-----------------+|
    dup2(std, 1)          ||     //       ||                 ||
                          ||     ||       ||                 ||
    fork+exec(baz)        ||     ||       ||                 ||
    
    • foo gets stdin=stdin, stdout=pipe1[1]
    • bar gets stdin=pipe1[0], stdout=pipe2[1]
    • baz gets stdin=pipe2[0], stdout=stdout

    My suggestion is different in that it avoids mangling the parent's stdin and stdout, only manipulating them within the child, and never leaks any FDs. It's a bit harder to diagram, though.

    for cmd in cmds
        if there is a next cmd
            pipe(new_fds)
        fork
        if child
            if there is a previous cmd
                dup2(old_fds[0], 0)
                close(old_fds[0])
                close(old_fds[1])
            if there is a next cmd
                close(new_fds[0])
                dup2(new_fds[1], 1)
                close(new_fds[1])
            exec cmd || die
        else
            if there is a previous cmd
                close(old_fds[0])
                close(old_fds[1])
            if there is a next cmd
                old_fds = new_fds
    
    parent
        cmds = [foo, bar, baz]
        fds = {0: stdin, 1: stdout}
    
    cmd = cmds[0] {
        there is a next cmd {
            pipe(new_fds)
                new_fds = {3, 4}
                fds = {0: stdin, 1: stdout, 3: pipe1[0], 4: pipe1[1]}
        }
    
        fork             => child
                            there is a next cmd {
                                close(new_fds[0])
                                    fds = {0: stdin, 1: stdout, 4: pipe1[1]}
                                dup2(new_fds[1], 1)
                                    fds = {0: stdin, 1: pipe1[1], 4: pipe1[1]}
                                close(new_fds[1])
                                    fds = {0: stdin, 1: pipe1[1]}
                            }
                            exec(cmd)
    
        there is a next cmd {
            old_fds = new_fds
                old_fds = {3, 4}
        }
    }
    
    cmd = cmds[1] {
        there is a next cmd {
            pipe(new_fds)
                new_fds = {5, 6}
                fds = {0: stdin, 1: stdout, 3: pipe1[0], 4: pipe1[1],
                                            5: pipe2[0], 6: pipe2[1]}
        }
    
        fork             => child
                            there is a previous cmd {
                                dup2(old_fds[0], 0)
                                    fds = {0: pipe1[0], 1: stdout,
                                           3: pipe1[0], 4: pipe1[1],
                                           5: pipe2[0], 6: pipe2[1]}
                                close(old_fds[0])
                                    fds = {0: pipe1[0], 1: stdout,
                                                        4: pipe1[1],
                                           5: pipe2[0]  6: pipe2[1]}
                                close(old_fds[1])
                                    fds = {0: pipe1[0], 1: stdout,
                                           5: pipe2[0], 6: pipe2[1]}
                            }
                            there is a next cmd {
                                close(new_fds[0])
                                    fds = {0: pipe1[0], 1: stdout, 6: pipe2[1]}
                                dup2(new_fds[1], 1)
                                    fds = {0: pipe1[0], 1: pipe2[1], 6: pipe2[1]}
                                close(new_fds[1])
                                    fds = {0: pipe1[0], 1: pipe1[1]}
                            }
                            exec(cmd)
    
        there is a previous cmd {
            close(old_fds[0])
                fds = {0: stdin, 1: stdout,              4: pipe1[1],
                                            5: pipe2[0], 6: pipe2[1]}
            close(old_fds[1])
                fds = {0: stdin, 1: stdout, 5: pipe2[0], 6: pipe2[1]}
        }
    
        there is a next cmd {
            old_fds = new_fds
                old_fds = {3, 4}
        }
    }
    
    cmd = cmds[2] {
        fork             => child
                            there is a previous cmd {
                                dup2(old_fds[0], 0)
                                    fds = {0: pipe2[0], 1: stdout,
                                           5: pipe2[0], 6: pipe2[1]}
                                close(old_fds[0])
                                    fds = {0: pipe2[0], 1: stdout,
                                                        6: pipe2[1]}
                                close(old_fds[1])
                                    fds = {0: pipe2[0], 1: stdout}
                            }
                            exec(cmd)
    
        there is a previous cmd {
            close(old_fds[0])
                fds = {0: stdin, 1: stdout,              6: pipe2[1]}
            close(old_fds[1])
                fds = {0: stdin, 1: stdout}
        }
    }
    

    Edit

    Your updated code does fix the previous FD leaks… but adds one: you're now leaking std0 to the children. As Jon says, this is probably not dangerous to most programs... but you still should write a better behaved shell than this.

    Even if it's temporary, I would strongly recommend against mangling your own shell's standard in/out/err (0/1/2), only doing so within the child right before exec. Why? Suppose you add some printf debugging in the middle, or you need to bail out due to an error condition. You'll be in trouble if you don't clean up your messed-up standard file descriptors first. Please, for the sake of having things operate as expected even in unexpected scenarios, don't muck with them until you need to.


    Edit

    As I mentioned in other comments, splitting it up into smaller parts makes it much easier to understand. This small helper should be easily understandable and bug-free:

    /* cmd, argv: passed to exec
     * fd_in, fd_out: when not -1, replaces stdin and stdout
     * return: pid of fork+exec child
     */
    int fork_and_exec_with_fds(char *cmd, char **argv, int fd_in, int fd_out) {
        pid_t child = fork();
        if (fork)
            return child;
    
        if (fd_in != -1 && fd_in != 0) {
            dup2(fd_in, 0);
            close(fd_in);
        }
    
        if (fd_out != -1 && fd_in != 1) {
            dup2(fd_out, 1);
            close(fd_out);
        }
    
        execvp(cmd, argv);
        exit(-1);
    }
    

    As should this:

    void run_pipeline(int num, char *cmds[], char **argvs[], int pids[]) {
        /* initially, don't change stdin */
        int fd_in = -1, fd_out;
        int i;
    
        for (i = 0; i < num; i++) {
            int fd_pipe[2];
    
            /* if there is a next command, set up a pipe for stdout */
            if (i + 1 < num) {
                pipe(fd_pipe);
                fd_out = fd_pipe[1];
            }
            /* otherwise, don't change stdout */
            else
                fd_out = -1;
    
            /* run child with given stdin/stdout */
            pids[i] = fork_and_exec_with_fds(cmds[i], argvs[i], fd_in, fd_out);
    
            /* nobody else needs to use these fds anymore
             * safe because close(-1) does nothing */
            close(fd_in);
            close(fd_out);
    
            /* set up stdin for next command */
            fd_in = fd_pipe[0];
        }
    }
    

    You can see Bash's execute_cmd.c#execute_disk_command being called from execute_cmd.c#execute_pipeline, xsh's process.c#process_run being called from jobs.c#job_run, and even every single one of BusyBox's various small and minimal shells splits them up.

    0 讨论(0)
  • 2020-12-18 14:31

    Both in this question and in another (as linked in the first post), ephemient suggested me a solution to the problem without messing with the parents file descriptors as demonstrated by a possible solution in this question.

    I didn't get his solution, I tried and tried to understand but I can't seem to get it. I also tried to code it without understanding but it didn't work. Probably because I've failed to understand it correctly and wasn't able to code it the it should have been coded.

    Anyway, I tried to come up with my own solution using some of the things I understood from the pseudo code and came up with this:

    #include <stdio.h>
    #include <stdlib.h>
    #include <unistd.h>
    #include <wait.h>
    #include <string.h>
    #include <readline/readline.h>
    #include <readline/history.h>
    
    #define NUMPIPES 5
    #define NUMARGS 10
    
    int main(int argc, char *argv[]) {
        char *bBuffer, *sPtr, *aPtr = NULL, *pipeComms[NUMPIPES], *cmdArgs[NUMARGS];
        int aPipe[2], bPipe[2], pCount, aCount, i, status;
        pid_t pid;
    
        using_history();
    
        while(1) {
            bBuffer = readline("\e[1;31mShell \e[1;32m# \e[0m");
    
            if(!strcasecmp(bBuffer, "exit")) {
                return 0;
            }
    
            if(strlen(bBuffer) > 0) {
                add_history(bBuffer);
            }
    
            sPtr = bBuffer;
            pCount =0;
    
            do {
                aPtr = strsep(&sPtr, "|");
    
                if(aPtr != NULL) {
                    if(strlen(aPtr) > 0) {
                        pipeComms[pCount++] = aPtr;
                    }
                }
            } while(aPtr);
    
            cmdArgs[pCount] = NULL;
    
            for(i = 0; i < pCount; i++) {
                aCount = 0;
    
                do {
                    aPtr = strsep(&pipeComms[i], " ");
    
                    if(aPtr != NULL) {
                        if(strlen(aPtr) > 0) {
                            cmdArgs[aCount++] = aPtr;
                        }
                    }
                } while(aPtr);
    
                cmdArgs[aCount] = NULL;
    
                // Do we have a next command?
                if(i < pCount-1) {
                    // Is this the first, third, fifth, etc... command?
                    if(i%2 == 0) {
                        pipe(aPipe);
                    }
    
                    // Is this the second, fourth, sixth, etc... command?
                    if(i%2 == 1) {
                        pipe(bPipe);
                    }
                }
    
                pid = fork();
    
                if(pid == 0) {
                    // Is this the first, third, fifth, etc... command?
                    if(i%2 == 0) {
                        // Do we have a previous command?
                        if(i > 0) {
                            close(bPipe[1]);
                            dup2(bPipe[0], STDIN_FILENO);
                            close(bPipe[0]);
                        }
    
                        // Do we have a next command?
                        if(i < pCount-1) {
                            close(aPipe[0]);
                            dup2(aPipe[1], STDOUT_FILENO);
                            close(aPipe[1]);
                        }
                    }
    
                    // Is this the second, fourth, sixth, etc... command?
                    if(i%2 == 1) {
                        // Do we have a previous command?
                        if(i > 0) {
                            close(aPipe[1]);
                            dup2(aPipe[0], STDIN_FILENO);
                            close(aPipe[0]);
                        }
    
                        // Do we have a next command?
                        if(i < pCount-1) {
                            close(bPipe[0]);
                            dup2(bPipe[1], STDOUT_FILENO);
                            close(bPipe[1]);
                        }
                    }
    
                    execvp(cmdArgs[0], cmdArgs);
                    exit(1);
                } else {
                    // Do we have a previous command?
                    if(i > 0) {
                        // Is this the first, third, fifth, etc... command?
                        if(i%2 == 0) {
                            close(bPipe[0]);
                            close(bPipe[1]);
                        }
    
                        // Is this the second, fourth, sixth, etc... command?
                        if(i%2 == 1) {
                            close(aPipe[0]);
                            close(aPipe[1]);
                        }
                    }
    
                    // wait for the last command? all others will run in the background
                    if(i == pCount-1) {
                        waitpid(pid, &status, 0);
                    }
    
                    // I know they will be left as zombies in the table
                    // Not relevant for this...
                }
            }
        }
    
        return 0;
    }
    

    This may not be the best and cleanest solution but it was something I could come up with and, most importantly, something I can understand. What good is to have something working that I don't understand and then I'm evaluated by my teacher and I can't explain to him what the code is doing?

    Anyway, what do you think about this one?

    0 讨论(0)
  • 2020-12-18 14:32

    It will give results, some that are not expected. It is far from a nice solution: It messes with the parent process' standard descriptors, does not recover the standard input, descriptors leak to children, etc.

    If you think recursively, it may be easier to understand. Below is a correct solution, without error checking. Consider a linked-list type command, with it's next pointer and a argv array.

    void run_pipeline(command *cmd, int input) {
      int pfds[2] = { -1, -1 };
    
      if (cmd->next != NULL) {
        pipe(pfds);
      }
      if (fork() == 0) { /* child */
        if (input != -1) {
          dup2(input, STDIN_FILENO);
          close(input);
        }
        if (pfds[1] != -1) {
          dup2(pfds[1], STDOUT_FILENO);
          close(pfds[1]);
        }
        if (pfds[0] != -1) {
          close(pfds[0]);
        }
        execvp(cmd->argv[0], cmd->argv);
        exit(1);
      }
      else { /* parent */
        if (input != -1) {
          close(input);
        }
        if (pfds[1] != -1) {
          close(pfds[1]);
        }
        if (cmd->next != NULL) {
          run_pipeline(cmd->next, pfds[0]);
        }
      }
    }
    

    Call it with the first command in the linked-list, and input = -1. It does the rest.

    0 讨论(0)
提交回复
热议问题