|  | /* | 
|  | * Copyright (c) 2011 The Regents of the University of California | 
|  | * David Zhu <yuzhu@cs.berkeley.edu> | 
|  | * See LICENSE for details. | 
|  | * | 
|  | * Socket layer on top of TCP abstraction. Similar to the BSD implementation. | 
|  | * | 
|  | */ | 
|  | #include <ros/common.h> | 
|  | #include <socket.h> | 
|  | #include <vfs.h> | 
|  | #include <time.h> | 
|  | #include <kref.h> | 
|  | #include <syscall.h> | 
|  | #include <sys/uio.h> | 
|  | #include <ros/errno.h> | 
|  | #include <net.h> | 
|  | #include <net/udp.h> | 
|  | #include <net/tcp.h> | 
|  | #include <net/pbuf.h> | 
|  | #include <net/tcp_impl.h> | 
|  | #include <umem.h> | 
|  | #include <kthread.h> | 
|  | #include <bitmask.h> | 
|  | #include <debug.h> | 
|  | /* | 
|  | *TODO: Figure out which socket.h is used where | 
|  | *There are several socket.h in kern, and a couple more in glibc. Perhaps the glibc ones | 
|  | *should grab from here.. | 
|  | */ | 
|  |  | 
|  | struct kmem_cache *sock_kcache; | 
|  | struct kmem_cache *mbuf_kcache; | 
|  | struct kmem_cache *udp_pcb_kcache; | 
|  | struct kmem_cache *tcp_pcb_kcache; | 
|  | struct kmem_cache *tcp_pcb_listen_kcache; | 
|  | struct kmem_cache *tcp_segment_kcache; | 
|  |  | 
|  | // file ops needed to support read/write on socket fd | 
|  | static struct file_operations socket_op = { | 
|  | 0, | 
|  | 0,//soo_read, | 
|  | 0,//soo_write, | 
|  | 0, | 
|  | 0, | 
|  | 0, | 
|  | 0, | 
|  | 0, | 
|  | 0, | 
|  | 0,//soo_poll, | 
|  | 0, | 
|  | 0, | 
|  | 0, // sendpage might apply here | 
|  | 0, | 
|  | }; | 
|  | static struct socket* getsocket(struct proc *p, int fd){ | 
|  | /* look up fd -> file */ | 
|  | struct file *so_file = get_file_from_fd(&(p->open_files), fd); | 
|  |  | 
|  | /* get socket and verify its type */ | 
|  | if (so_file == NULL){ | 
|  | printd("getsocket() fd -> null file: fd %d\n", fd); | 
|  | return NULL; | 
|  | } | 
|  | if (so_file->f_op != &socket_op) { | 
|  | set_errno(ENOTSOCK); | 
|  | printd("fd %d maps to non-socket file\n"); | 
|  | return NULL; | 
|  | } else | 
|  | return (struct socket*) so_file->f_privdata; | 
|  | } | 
|  |  | 
|  | struct socket* alloc_sock(int socket_family, int socket_type, int protocol){ | 
|  | struct socket *newsock = kmem_cache_alloc(sock_kcache, 0); | 
|  | assert(newsock); | 
|  |  | 
|  | newsock->so_family = socket_family; | 
|  | newsock->so_type = socket_type; | 
|  | newsock->so_protocol = protocol; | 
|  | newsock->so_state = SS_ISDISCONNECTED; | 
|  | STAILQ_INIT(&(newsock->acceptq)); | 
|  | pbuf_head_init(&newsock->recv_buff); | 
|  | pbuf_head_init(&newsock->send_buff); | 
|  | sem_init_irqsave(&newsock->sem, 0); | 
|  | sem_init_irqsave(&newsock->accept_sem, 0); | 
|  | spinlock_init(&newsock->waiter_lock); | 
|  | LIST_INIT(&newsock->waiters); | 
|  | return newsock; | 
|  |  | 
|  | } | 
|  | // TODO: refactor vfs so we can allocate fd and do the basic initialization | 
|  | struct file *alloc_socket_file(struct socket* sock) { | 
|  | struct file *file = alloc_file(); | 
|  | if (file == NULL) return 0; | 
|  |  | 
|  | // Linux fakes a dentry and an inode for socks, see socket.c : sock_alloc_file | 
|  | file->f_dentry = NULL; // This might break things? | 
|  | file->f_vfsmnt = 0; | 
|  | file->f_flags = 0; | 
|  |  | 
|  | file->f_mode = S_IRUSR | S_IWUSR; // both read and write for socket files | 
|  |  | 
|  | file->f_pos = 0; | 
|  | file->f_uid = 0; | 
|  | file->f_gid = 0; | 
|  | file->f_error = 0; | 
|  |  | 
|  | file->f_op = &socket_op; | 
|  | file->f_privdata = sock; | 
|  | file->f_mapping = 0; | 
|  | return file; | 
|  | } | 
|  |  | 
|  | void socket_init(){ | 
|  |  | 
|  | /* allocate buf for socket */ | 
|  | sock_kcache = kmem_cache_create("socket", sizeof(struct socket), | 
|  | __alignof__(struct socket), 0, 0, 0); | 
|  | udp_pcb_kcache = kmem_cache_create("udppcb", sizeof(struct udp_pcb), | 
|  | __alignof__(struct udp_pcb), 0, 0, 0); | 
|  | tcp_pcb_kcache = kmem_cache_create("tcppcb", sizeof(struct tcp_pcb), | 
|  | __alignof__(struct tcp_pcb), 0, 0, 0); | 
|  | tcp_pcb_listen_kcache = kmem_cache_create("tcppcblisten", sizeof(struct tcp_pcb_listen), | 
|  | __alignof__(struct tcp_pcb_listen), 0, 0, 0); | 
|  | tcp_segment_kcache = kmem_cache_create("tcpsegment", sizeof(struct tcp_seg), | 
|  | __alignof__(struct tcp_seg), 0, 0, 0); | 
|  | pbuf_init(); | 
|  |  | 
|  | } | 
|  | intreg_t sys_accept(struct proc *p, int sockfd, struct sockaddr *addr, socklen_t *addrlen) { | 
|  | printk ("sysaccept called\n"); | 
|  | struct socket* sock = getsocket(p, sockfd); | 
|  | struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; | 
|  | uint16_t r_port; | 
|  | struct socket *accepted = NULL; | 
|  | int8_t irq_state = 0; | 
|  | if (sock == NULL) { | 
|  | set_errno(EBADF); | 
|  | return -1; | 
|  | } | 
|  | if (sock->so_type == SOCK_DGRAM){ | 
|  | return -1; // indicates false for connect | 
|  | } else if (sock->so_type == SOCK_STREAM) { | 
|  | /* XXX these do the same thing, what is it you actually wanted to do? | 
|  | * (Originally the first was sleep_on, and the second __down_sem */ | 
|  | if (STAILQ_EMPTY(&(sock->acceptq))) { | 
|  | // block on the acceptq | 
|  | sem_down_irqsave(&sock->accept_sem, &irq_state); | 
|  | } else { | 
|  | sem_down_irqsave(&sock->accept_sem, &irq_state); | 
|  | } | 
|  | spin_lock_irqsave(&sock->waiter_lock); | 
|  | accepted = STAILQ_FIRST(&(sock->acceptq)); | 
|  | STAILQ_REMOVE_HEAD((&(sock->acceptq)), next); | 
|  | spin_unlock_irqsave(&sock->waiter_lock); | 
|  | if (accepted == NULL) return -1; | 
|  | struct file *file = alloc_socket_file(accepted); | 
|  | if (file == NULL) return -1; | 
|  | int fd = insert_file(&p->open_files, file, 0); | 
|  | if (fd < 0) { | 
|  | warn("File insertion for socket open failed"); | 
|  | return -1; | 
|  | } | 
|  | kref_put(&file->f_kref); | 
|  | } | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | static error_t accept_callback(void *arg, struct tcp_pcb *newpcb, error_t err) { | 
|  | struct socket *sockold = (struct socket *) arg; | 
|  | struct socket *sock = alloc_sock(sockold->so_family, sockold->so_type, sockold->so_protocol); | 
|  | int8_t irq_state = 0; | 
|  |  | 
|  | sock->so_pcb = newpcb; | 
|  | newpcb->pcbsock = sock; | 
|  | spin_lock_irqsave(&sockold->waiter_lock); | 
|  | STAILQ_INSERT_TAIL(&sockold->acceptq, sock, next); | 
|  | // wake up any kthread who is potentially waiting | 
|  | spin_unlock_irqsave(&sockold->waiter_lock); | 
|  | sem_up_irqsave(&sock->accept_sem, &irq_state); | 
|  | return 0; | 
|  | } | 
|  | intreg_t sys_listen(struct proc *p, int sockfd, int backlog) { | 
|  | struct socket* sock = getsocket(p, sockfd); | 
|  | if (sock == NULL) { | 
|  | set_errno(EBADF); | 
|  | return -1; | 
|  | } | 
|  | if (sock->so_type == SOCK_DGRAM){ | 
|  | return -1; // indicates false for connect | 
|  | } else if (sock->so_type == SOCK_STREAM) { | 
|  | // check if the socket is in WAIT state | 
|  | struct tcp_pcb *tpcb = (struct tcp_pcb*)sock->so_pcb; | 
|  | struct tcp_pcb* lpcb = tcp_listen_with_backlog(tpcb, backlog); | 
|  | if (lpcb == NULL) { | 
|  | return -1; | 
|  | } | 
|  | sock->so_pcb = lpcb; | 
|  |  | 
|  | // register callback for new connection | 
|  | tcp_arg(lpcb, sock); | 
|  | tcp_accept(lpcb, accept_callback); | 
|  |  | 
|  | return 0; | 
|  |  | 
|  |  | 
|  | // XXX: add backlog later | 
|  | } | 
|  | return -1; | 
|  | } | 
|  | intreg_t sys_connect(struct proc *p, int sock_fd, const struct sockaddr* addr, int addrlen) { | 
|  | printk("sys_connect called \n"); | 
|  | struct socket* sock = getsocket(p, sock_fd); | 
|  | struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; | 
|  | uint16_t r_port; | 
|  | if (sock == NULL) { | 
|  | set_errno(EBADF); | 
|  | return -1; | 
|  | } | 
|  | if (sock->so_type == SOCK_DGRAM){ | 
|  | return -1; // indicates false for connect | 
|  | } else if (sock->so_type == SOCK_STREAM) { | 
|  | error_t err = tcp_connect((struct tcp_pcb*)sock->so_pcb, & (in_addr->sin_addr), in_addr->sin_port, NULL); | 
|  | return err; | 
|  | } | 
|  |  | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | intreg_t sys_send(struct proc *p, int sockfd, const void *buf, size_t len, | 
|  | int flags) { | 
|  | printk("sys_send called \n"); | 
|  | struct socket* sock = getsocket(p, sockfd); | 
|  | const struct sockaddr_in *in_addr = (const struct sockaddr_in *)buf; | 
|  | uint16_t r_port; | 
|  | if (sock == NULL) { | 
|  | set_errno(EBADF); | 
|  | return -1; | 
|  | } | 
|  | return len; | 
|  |  | 
|  | } | 
|  | intreg_t sys_recv(struct proc *p, int sockfd, void *buf, size_t len, int flags) { | 
|  | printk("sys_recv called \n"); | 
|  | // return actual length filled | 
|  | return len; | 
|  | } | 
|  |  | 
|  | intreg_t sys_bind(struct proc* p_proc, int fd, const struct sockaddr *addr, socklen_t addrlen) { | 
|  | struct socket* sock = getsocket(p_proc, fd); | 
|  | const struct sockaddr_in *in_addr = (const struct sockaddr_in *)addr; | 
|  | uint16_t r_port; | 
|  | if (sock == NULL) { | 
|  | set_errno(EBADF); | 
|  | return -1; | 
|  | } | 
|  | if (sock->so_type == SOCK_DGRAM){ | 
|  | return udp_bind((struct udp_pcb*)sock->so_pcb, & (in_addr->sin_addr), in_addr->sin_port); | 
|  | } else if (sock->so_type == SOCK_STREAM) { | 
|  | return tcp_bind((struct tcp_pcb*)sock->so_pcb, & (in_addr->sin_addr), in_addr->sin_port); | 
|  | } else { | 
|  | printk("SOCK type not supported in bind operation \n"); | 
|  | return -1; | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | intreg_t sys_socket(struct proc *p, int socket_family, int socket_type, int protocol){ | 
|  | //check validity of params | 
|  | if (socket_family != AF_INET && socket_type != SOCK_DGRAM) | 
|  | return 0; | 
|  | struct socket *sock = alloc_sock(socket_family, socket_type, protocol); | 
|  | if (socket_type == SOCK_DGRAM){ | 
|  | /* udp socket */ | 
|  | sock->so_pcb = udp_new(); | 
|  | /* back link */ | 
|  | ((struct udp_pcb*) (sock->so_pcb))->pcbsock = sock; | 
|  | } else if (socket_type == SOCK_STREAM) { | 
|  | /* tcp socket */ | 
|  | sock->so_pcb = tcp_new(); | 
|  | ((struct tcp_pcb*) (sock->so_pcb))->pcbsock = sock; | 
|  | } | 
|  | struct file *file = alloc_socket_file(sock); | 
|  |  | 
|  | if (file == NULL) return -1; | 
|  | int fd = insert_file(&p->open_files, file, 0); | 
|  | if (fd < 0) { | 
|  | warn("File insertion for socket open failed"); | 
|  | return -1; | 
|  | } | 
|  | kref_put(&file->f_kref); | 
|  | printk("Socket open, res = %d\n", fd); | 
|  | return fd; | 
|  | } | 
|  |  | 
|  | intreg_t send_iov(struct socket* sock, struct iovec* iov, int flags){ | 
|  | // COPY_COUNT: for each iov, copy into mbuf, and send | 
|  | // should not copy here, copy in the protocol.. | 
|  | // should be esomething like this sock->so_proto->pr_send(sock, iov, flags); | 
|  | // make it datagram specific for now... | 
|  | send_datagram(sock, iov, flags); | 
|  | // finally time to check for validity of UA, in the protocol send | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /*TODO: iov support currently broken */ | 
|  | int send_datagram(struct socket* sock, struct iovec* iov, int flags){ | 
|  | // is this a connection oriented protocol? | 
|  | struct pbuf *prev = NULL; | 
|  | struct pbuf *curr = NULL; | 
|  | if (sock->so_type == SOCK_STREAM){ | 
|  | set_errno(ENOTCONN); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | // possible sock locks needed | 
|  | if ((sock->so_state & SS_ISCONNECTED) == 0){ | 
|  | set_errno(EINVAL); | 
|  | return -1; | 
|  | } | 
|  | // pbuf_ref needs to map in the user ref | 
|  | for (int i = 0; i< sizeof(iov) / sizeof (struct iovec); i++){ | 
|  | prev = curr; | 
|  | curr = pbuf_alloc(PBUF_TRANSPORT, iov[i].iov_len, PBUF_REF); | 
|  | if (prev!=NULL) pbuf_chain(prev, curr); | 
|  | } | 
|  | // struct pbuf* pb = pbuf_alloc(PBUF_TRANSPORT, PBUF_REF); | 
|  | udp_send(sock->so_pcb, prev); | 
|  | return 0; | 
|  |  | 
|  | } | 
|  |  | 
|  | /* sys_sendto can send SOCK_DGRAM and eventually SOCK_STREAM | 
|  | * SOCK_DGRAM uses PBUF_REF since UDP does not need to wait for ack | 
|  | * SOCK_STREAM uses PBUF_ | 
|  | * | 
|  | */ | 
|  | intreg_t sys_sendto(struct proc *p_proc, int fd, const void *buffer, size_t length, | 
|  | int flags, const struct sockaddr *dest_addr, socklen_t dest_len){ | 
|  | // look up the socket | 
|  | struct socket* sock = getsocket(p_proc, fd); | 
|  | int error; | 
|  | struct sockaddr_in *in_addr; | 
|  | uint16_t r_port; | 
|  | if (sock == NULL) { | 
|  | set_errno(EBADF); | 
|  | return -1; | 
|  | } | 
|  | if (sock->so_type == SOCK_DGRAM){ | 
|  | in_addr = (struct sockaddr_in *)dest_addr; | 
|  | struct pbuf* buf = pbuf_alloc(PBUF_TRANSPORT, length, PBUF_REF); | 
|  | if (buf != NULL) | 
|  | buf->payload = (void*)buffer; | 
|  | else | 
|  | warn("pbuf alloc failed \n"); | 
|  | // potentially unsafe cast to udp_pcb | 
|  | return udp_sendto((struct udp_pcb*) sock->so_pcb, buf, &in_addr->sin_addr, in_addr->sin_port); | 
|  | } | 
|  |  | 
|  | return -1; | 
|  | //TODO: support for sendmsg and iovectors? Let's get the basics working first! | 
|  | #if 0 | 
|  | // use iovector to handle sendmsg calls too, and potentially scatter-gather | 
|  | struct msghdr msg; | 
|  | struct iovec iov; | 
|  | struct uio auio; | 
|  |  | 
|  | // checking for permission only when you are sending it | 
|  | // potential bug TOCTOU, especially with async calls | 
|  |  | 
|  | msg.msg_name = dest_addr; | 
|  | msg.msg_namelen = dest_len; | 
|  | msg.msg_iov = &iov; | 
|  | msg.msg_iovlen = 1; | 
|  | msg.msg_control = 0; | 
|  |  | 
|  | iov.iov_base = buffer; | 
|  | iov.iov_len = length; | 
|  |  | 
|  |  | 
|  | // this is why we need another function to populate auio | 
|  |  | 
|  | auio.uio_iov = iov; | 
|  | auio.uio_iovcnt = 1; | 
|  | auio.uio_offset = 0; | 
|  | auio.uio_resid = 0; | 
|  | auio.uio_rw = UIO_WRITE; | 
|  | auio.uio_proc = p; | 
|  |  | 
|  | // consider changing to send_uaio, since we care about progress. | 
|  | error = send_iov(soc, iov, flags); | 
|  | #endif | 
|  | } | 
|  |  | 
|  | /* UDP and TCP has different waiting semantics | 
|  | * UDP requires any packet to be available. | 
|  | * TCP requires accumulation of certain size? | 
|  | */ | 
|  | intreg_t sys_recvfrom(struct proc *p, int socket, void *restrict buffer, size_t length, int flags, struct sockaddr *restrict address, socklen_t *restrict address_len){ | 
|  | struct socket* sock = getsocket(p, socket); | 
|  | int copied = 0; | 
|  | int returnval = 0; | 
|  | int8_t irq_state = 0; | 
|  | if (sock == NULL) { | 
|  | set_errno(EBADF); | 
|  | return -1; | 
|  | } | 
|  | if (sock->so_type == SOCK_DGRAM){ | 
|  | struct pbuf_head *ph = &(sock->recv_buff); | 
|  | struct pbuf* buf = NULL; | 
|  | buf = detach_pbuf(ph); | 
|  | if (!buf){ | 
|  | // about to sleep | 
|  | sem_down_irqsave(&sock->sem, &irq_state); | 
|  | buf = detach_pbuf(ph); | 
|  | // Someone woke me up, there should be data.. | 
|  | assert(buf); | 
|  | } else { | 
|  | sem_down_irqsave(&sock->sem, &irq_state); | 
|  | } | 
|  | copied = buf->len - sizeof(struct udp_hdr); | 
|  | if (copied > length) | 
|  | copied = length; | 
|  | pbuf_header(buf, -UDP_HDR_SZ); | 
|  | // copy it to user space | 
|  | returnval = memcpy_to_user_errno(p, buffer, buf->payload, copied); | 
|  | } | 
|  | if (returnval < 0) | 
|  | return -1; | 
|  | else | 
|  | return copied; | 
|  | } | 
|  |  | 
|  | static int selscan(int maxfdp1, fd_set *readset_in, fd_set *writeset_in, fd_set *exceptset_in, | 
|  | fd_set *readset_out, fd_set *writeset_out, fd_set *exceptset_out){ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /* TODO: Start respecting the time out value */ | 
|  | /* TODO: start respecting writefds and exceptfds */ | 
|  | intreg_t sys_select(struct proc *p, int nfds, fd_set *readfds, fd_set *writefds, | 
|  | fd_set *exceptfds, struct timeval *timeout){ | 
|  | /* Create a semaphore */ | 
|  | struct semaphore_entry read_sem; | 
|  | int8_t irq_state = 0; | 
|  |  | 
|  | sem_init_irqsave(&(read_sem.sem), 0); | 
|  |  | 
|  | /* insert into the sem list of a fd / socket */ | 
|  | int low_fd = 0; | 
|  | for (int i = low_fd; i< nfds; i++) { | 
|  | if(FD_ISSET(i, readfds)){ | 
|  | struct socket* sock = getsocket(p, i); | 
|  | /* if the fd is not open or if the file descriptor is not a socket | 
|  | * go to the next in the fd set | 
|  | */ | 
|  | if (sock == NULL) continue; | 
|  | /* for each file that is open, insert this semaphore to be woken up when there | 
|  | * is data available to be read | 
|  | */ | 
|  | spin_lock(&sock->waiter_lock); | 
|  | LIST_INSERT_HEAD(&sock->waiters, &read_sem, link); | 
|  | spin_unlock(&sock->waiter_lock); | 
|  | } | 
|  | } | 
|  | /* At this point wait on the semaphore */ | 
|  | sem_down_irqsave(&read_sem.sem, &irq_state); | 
|  | /* someone woke me up, so walk through the list of descriptors and find one that is ready */ | 
|  | /* remove itself from all the lists that it is waiting on */ | 
|  | for (int i = low_fd; i<nfds; i++) { | 
|  | if (FD_ISSET(i, readfds)){ | 
|  | struct socket* sock = getsocket(p,i); | 
|  | if (sock == NULL) continue; | 
|  | spin_lock(&sock->waiter_lock); | 
|  | LIST_REMOVE(&read_sem, link); | 
|  | spin_unlock(&sock->waiter_lock); | 
|  | } | 
|  | } | 
|  | fd_set readout, writeout, exceptout; | 
|  | FD_ZERO(&readout); | 
|  | FD_ZERO(&writeout); | 
|  | FD_ZERO(&exceptout); | 
|  | for (int i = low_fd; i< nfds; i ++){ | 
|  | if (readfds && FD_ISSET(i, readfds)){ | 
|  | struct socket* sock = getsocket(p, i); | 
|  | if ((sock->recv_buff).qlen > 0){ | 
|  | FD_SET(i, &readout); | 
|  | } | 
|  | /* if the socket is ready, then we can return it */ | 
|  | } | 
|  | } | 
|  | if (readfds) | 
|  | memcpy(readfds, &readout, sizeof(*readfds)); | 
|  | if (writefds) | 
|  | memcpy(writefds, &writeout, sizeof(*writefds)); | 
|  | if (exceptfds) | 
|  | memcpy(readfds, &readout, sizeof(*readfds)); | 
|  |  | 
|  | /* Sleep on that semaphore */ | 
|  | /* Somehow get these file descriptors to wake me up when there is new data */ | 
|  | return 0; | 
|  | } |