-1

I apologize for the length, but I suspect the bug might be in the error handling and there's a bit of necessary boilerplate for networking.

I start the following implementation of a single-file forking tcp server.

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <netinet/in.h>
#include <netdb.h>
#include <string.h>
#include <arpa/inet.h>
#include <pthread.h>
#include <errno.h>
#include <pty.h>

#define PORT 9600
#define BUF_SIZE 128
#define SERVER_BACKLOG 10

void handle_conn(int conn_fd) {
  // Create a TTY / PTY pair and hook it up to the connection.
  int master_fd;
  char slave_name[100];
  pid_t pid = forkpty(&master_fd, slave_name, NULL, NULL);
  if (pid == -1) {
    fprintf(stderr, "Error on first fork: %d %s",  errno, strerror(errno));
    return;
  } else if (pid == 0) { // Child process attached to slave pty
    execl("/bin/bash", "bash", (char*) NULL);
  }

  // Set both fds to be non-blocking
  int flags = fcntl(master_fd, F_GETFL, 0);
  fcntl(master_fd, F_SETFL, flags | O_NONBLOCK);
  flags = fcntl(conn_fd, F_GETFL, 0);
  fcntl(conn_fd, F_SETFL, flags | O_NONBLOCK);

  // Parent continues here, shuffling data back and forth from the socket to
  // the master fd.
  int n;
  char c;
  while(1) {
    fd_set fds;
    FD_ZERO(&fds);
    FD_SET(conn_fd, &fds);
    FD_SET(master_fd, &fds);
    int numFds = select((conn_fd > master_fd ? conn_fd : master_fd) + 1, &fds, NULL, NULL, NULL);

    if (numFds == -1) {
      fprintf(stderr, "Error on select: %d %s",  errno, strerror(errno));
      return;
    }
    if (numFds == 0) {
      continue;
    }
    if (FD_ISSET(conn_fd, &fds)) {
      // Read from socket and write to tty.
      while ((n = read(conn_fd, &c, 1)) > 0) {
        write(master_fd, &c, 1);
      }

      if (n == 0) {
        // Reached EOF
        break;
      }
      // Presumed n is -1
      if (errno != EAGAIN || errno != EWOULDBLOCK) {
        fprintf(stderr, "Error reading socket %d %s\n", errno, strerror(errno));
        break;
      }
    }

    if (FD_ISSET(master_fd, &fds)) {
      // Read from tty and write to socket as long as things are still
      // readable.
      while ((n = read(master_fd, &c, 1)) > 0) {
        write(conn_fd, &c, 1);
      }

      if (n == 0) {
        // Reached EOF
        break;
      }

      // Presumed n is -1
      if (errno != EAGAIN && errno != EWOULDBLOCK) {
        fprintf(stderr, "Error reading master FD %d %s\n", errno, strerror(errno));
        break;
      }
    }
  }
  printf("Closing connection [%d]\n", conn_fd);
  int err = close(conn_fd);
  if (err != 0) {
    fprintf(stderr, "Error while closing connection [%d]\n", conn_fd);
  }

  err = close(master_fd);
  if (err != 0) {
    fprintf(stderr, "Error while closing master fd [%d]\n", conn_fd);
  }
}

int main(int argc, char *argv[]) {
  int server_fd = socket(PF_INET, SOCK_STREAM, 0);
  if (server_fd < 0) {
    fprintf(stderr, "Cannot create socket\n");
    return 1;
  }

  struct sockaddr_in server_addr;
  server_addr.sin_family = AF_INET;
  server_addr.sin_port = htons(PORT);
  int ok = inet_pton(AF_INET, "127.0.0.1", &server_addr.sin_addr);
  if (!ok) {
    fprintf(stderr, "Cannot parse IP address\n");
    return 1;
  }

  // Allow reuse of port.
  int optval = 1;
  setsockopt(server_fd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(optval));

  int err = bind(server_fd, (struct sockaddr *) &server_addr, sizeof(server_addr));
  if (err != 0) {
    fprintf(stderr, "Cannot bind server: Error %d %s\n", errno, strerror(errno));
    return 1;
  }

  err = listen(server_fd, SERVER_BACKLOG);
  if (err != 0) {
    fprintf(stderr, "Cannot listen\n");
    return 1;
  }

  printf("Server listening on port %d\n", PORT);

  int client_fd;
  struct sockaddr_in client_addr;
  socklen_t client_addr_len;
  while (1) {
    client_fd = accept(server_fd, (struct sockaddr *) &client_addr, &client_addr_len);
    if (client_fd < 0) {
      fprintf(stderr, "Cannot accept: Error %d %s\n", errno, strerror(errno));
      return 1;
    }

    printf("New client [%d]\n", client_fd);

    // Using fork instead of threads here to avoid pitfalls involved with
    // mixing forking and threads, since this use case case of a tty host is
    // not expected to involve a great many requests.
    int pid = fork();
    if (pid == -1) {
      fprintf(stderr, "Error on forking to handle new connection: %d %s",  errno, strerror(errno));
      // Server does not need to die here.
      continue;
    } else if (pid == 0) { // Child will handle the connection and then return.
      handle_conn(client_fd);
      return 0;
    }
    // Parent continues to accept
  }
}

Then on another terminal, I run the following netcat client.

stty -icanon -echo; nc localhost 9600

At this point, I can type commands like ls or cat on the terminal with nc.

However, when I type exit, the server prints Error reading master FD 5 Input/output error\nClosing connection [4], and no more errors, so I assume that the server has successfully closed the connection, but the client continues to hang until I press Control-C on the client.

How can I modify the server code to force the client to disconnect?

merlin2011
  • 63,368
  • 37
  • 161
  • 279
  • Hello. I can't relate your error message to your client code. Can you please clarify? – Iharob Al Asimi Nov 10 '20 at 07:22
  • @IharobAlAsimi The first line is `fprintf(stderr, "Error reading socket %d %s\n", errno, strerror(errno));`. The latter line is `printf("Closing connection [%d]\n", conn_fd);`. – merlin2011 Nov 10 '20 at 07:25
  • Also, the actual "error" I care about is that the client is hanging instead of properly disconnecting. – merlin2011 Nov 10 '20 at 07:26
  • Oh, so that's the output in your server code. Don't you feel like your client loop has a very strange logic? I mean, it seems like you want to loop while something or until something very specific happens. – Iharob Al Asimi Nov 10 '20 at 07:26
  • Sorry I think I confused you with the extra code. The client is just `nc`. – merlin2011 Nov 10 '20 at 07:27
  • @IharobAlAsimi I've removed the confusing extra example code. All messages are printed by the server. – merlin2011 Nov 10 '20 at 07:28
  • Thank you, yes indeed. I just read that it was nc. – Iharob Al Asimi Nov 10 '20 at 07:29
  • 1
    Can you try to call [`shutdown()`](https://man7.org/linux/man-pages/man2/shutdown.2.html)? – Iharob Al Asimi Nov 10 '20 at 07:36
  • @IharobAlAsimi Just tried putting `shutdown(conn_fd, SHUT_RDWR);` before `close`. Same behavior. – merlin2011 Nov 10 '20 at 07:41
  • `close()` is sufficient. If your client doesn't respond to that there is something wrong with it. `shutdown()` is not necessary @IharobAlAsimi. If you think otherwise please state why. – user207421 Nov 10 '20 at 08:55
  • @MarquisofLorne, The client is `/bin/nc.openbsd`. Moreover, I have other examples of server code (see previous edits of this question) where the client correctly disconnects on close. Those examples do not involve `ttys`. That is why I suspect it's a server issue. – merlin2011 Nov 10 '20 at 08:57
  • You didn't get an error on closing the FD, so it worked, so the client should have recognized that. – user207421 Nov 10 '20 at 08:59
  • @MarquisofLorne My fix below suggests that perhaps the client does not get the memo until both the parent's version of the FD and the child's version are closed. Thus there was still a bug in my server code even though the close succeeded. – merlin2011 Nov 10 '20 at 09:29

1 Answers1

0

I have discovered the error! It turns out that I had forgotten to close the socket in the parent process. Once I did that, the close() worked as expected in the child.

A simple modification of the above code, as follows, fixes the problem.

    ...
    // Parent continues to accept
    close(client_fd);
merlin2011
  • 63,368
  • 37
  • 161
  • 279
  • If that was the only error `shutdown()` would have fixed it. As it didn't, clearly that wasn't the only error. As you haven't posted all the relevant code it is impossible for anyone to comment further. – user207421 Nov 10 '20 at 09:37
  • @MarquisofLorne I have posted all the code I'm running. What else would you like me to post? The source code for `nc`? – merlin2011 Nov 10 '20 at 18:57