/* * NET3: Implementation of BSD Unix domain sockets. * * Authors: Alan Cox, <alan@cymru.net> * * Currently this contains all but the file descriptor passing code. * Before that goes in the odd bugs in the iovec handlers need * fixing, and this bit testing. BSD fd passing is not a trivial part * of the exercise it turns out. Anyone like writing garbage collectors. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Fixes: * Linus Torvalds : Assorted bug cures. * Niibe Yutaka : async I/O support. * Carsten Paeth : PF_UNIX check, address fixes. * Alan Cox : Limit size of allocated blocks. * Alan Cox : Fixed the stupid socketpair bug. * Alan Cox : BSD compatibility fine tuning. * Alan Cox : Fixed a bug in connect when interrupted. * Alan Cox : Sorted out a proper draft version of * file descriptor passing hacked up from * Mike Shaver's work. * Marty Leisner : Fixes to fd passing * Nick Nevin : recvmsg bugfix. * Alan Cox : Started proper garbage collector * Heiko EiBfeldt : Missing verify_area check * * Known differences from reference BSD that was tested: * * [TO FIX] * ECONNREFUSED is not returned from one end of a connected() socket to the * other the moment one end closes. * fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark * and a fake inode identifier (nor the BSD first socket fstat twice bug). * [NOT TO FIX] * accept() returns a path name even if the connecting socket has closed * in the meantime (BSD loses the path and gives up). * accept() returns 0 length path for an unbound connector. BSD returns 16 * and a null first byte in the path (but not for gethost/peername - BSD bug ??) * socketpair(...SOCK_RAW..) doesn't panic the kernel. * BSD af_unix apparently has connect forgetting to block properly. */ #include <linux/config.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/stat.h> #include <linux/socket.h> #include <linux/un.h> #include <linux/fcntl.h> #include <linux/termios.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in.h> #include <linux/fs.h> #include <linux/malloc.h> #include <asm/segment.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <net/sock.h> #include <net/tcp.h> #include <net/af_unix.h> #include <linux/proc_fs.h> unix_socket *unix_socket_list=NULL; #define min(a,b) (((a)<(b))?(a):(b)) /* * Make sure the unix name is null-terminated. */ static inline void unix_mkname(struct sockaddr_un * sunaddr, unsigned long len) { if (len >= sizeof(*sunaddr)) len = sizeof(*sunaddr)-1; ((char *)sunaddr)[len]=0; } /* * Note: Sockets may not be removed _during_ an interrupt or net_bh * handler using this technique. They can be added although we do not * use this facility. */ static void unix_remove_socket(unix_socket *sk) { unix_socket **s; cli(); s=&unix_socket_list; while(*s!=NULL) { if(*s==sk) { *s=sk->next; sti(); return; } s=&((*s)->next); } sti(); } static void unix_insert_socket(unix_socket *sk) { cli(); sk->next=unix_socket_list; unix_socket_list=sk; sti(); } static unix_socket *unix_find_socket(struct inode *i) { unix_socket *s; cli(); s=unix_socket_list; while(s) { if(s->protinfo.af_unix.inode==i) { sti(); return(s); } s=s->next; } sti(); return(NULL); } /* * Delete a unix socket. We have to allow for deferring this on a timer. */ static void unix_destroy_timer(unsigned long data) { unix_socket *sk=(unix_socket *)data; if(sk->protinfo.af_unix.locks==0 && sk->wmem_alloc==0) { if(sk->protinfo.af_unix.name) kfree(sk->protinfo.af_unix.name); sk_free(sk); return; } /* * Retry; */ sk->timer.expires=jiffies+10*HZ; /* No real hurry try it every 10 seconds or so */ add_timer(&sk->timer); } static void unix_delayed_delete(unix_socket *sk) { sk->timer.data=(unsigned long)sk; sk->timer.expires=jiffies+HZ; /* Normally 1 second after will clean up. After that we try every 10 */ sk->timer.function=unix_destroy_timer; add_timer(&sk->timer); } static void unix_destroy_socket(unix_socket *sk) { struct sk_buff *skb; unix_remove_socket(sk); while((skb=skb_dequeue(&sk->receive_queue))!=NULL) { if(sk->state==TCP_LISTEN) { unix_socket *osk=skb->sk; osk->state=TCP_CLOSE; kfree_skb(skb, FREE_WRITE); /* Now surplus - free the skb first before the socket */ osk->state_change(osk); /* So the connect wakes and cleans up (if any) */ /* osk will be destroyed when it gets to close or the timer fires */ } else { /* passed fds are erased in the kfree_skb hook */ kfree_skb(skb,FREE_WRITE); } } if(sk->protinfo.af_unix.inode!=NULL) { iput(sk->protinfo.af_unix.inode); sk->protinfo.af_unix.inode=NULL; } if(--sk->protinfo.af_unix.locks==0 && sk->wmem_alloc==0) { if(sk->protinfo.af_unix.name) kfree(sk->protinfo.af_unix.name); sk_free(sk); } else { sk->dead=1; unix_delayed_delete(sk); /* Try every so often until buffers are all freed */ } } /* * Fixme: We need async I/O on AF_UNIX doing next. */ static int unix_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg) { return -EINVAL; } /* * Yes socket options work with the new unix domain socketry!!!!!!! */ static int unix_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen) { unix_socket *sk=sock->data; if(level!=SOL_SOCKET) return -EOPNOTSUPP; return sock_setsockopt(sk,level,optname,optval,optlen); } static int unix_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { unix_socket *sk=sock->data; if(level!=SOL_SOCKET) return -EOPNOTSUPP; return sock_getsockopt(sk,level,optname,optval,optlen); } static int unix_listen(struct socket *sock, int backlog) { unix_socket *sk=sock->data; if(sk->type!=SOCK_STREAM) return -EOPNOTSUPP; /* Only stream sockets accept */ if(sk->protinfo.af_unix.name==NULL) return -EINVAL; /* No listens on an unbound socket */ sk->max_ack_backlog=backlog; sk->state=TCP_LISTEN; return 0; } static void def_callback1(struct sock *sk) { if(!sk->dead) wake_up_interruptible(sk->sleep); } static void def_callback2(struct sock *sk, int len) { if(!sk->dead) { wake_up_interruptible(sk->sleep); sock_wake_async(sk->socket, 1); } } static void def_callback3(struct sock *sk) { if(!sk->dead) { wake_up_interruptible(sk->sleep); sock_wake_async(sk->socket, 2); } } static int unix_create(struct socket *sock, int protocol) { unix_socket *sk; if(protocol && protocol != PF_UNIX) return -EPROTONOSUPPORT; sk=(unix_socket *)sk_alloc(GFP_KERNEL); if(sk==NULL) return -ENOMEM; switch(sock->type) { case SOCK_STREAM: break; /* * Believe it or not BSD has AF_UNIX, SOCK_RAW though * nothing uses it. */ case SOCK_RAW: sock->type=SOCK_DGRAM; case SOCK_DGRAM: break; default: sk_free(sk); return -ESOCKTNOSUPPORT; } sk->type=sock->type; init_timer(&sk->timer); skb_queue_head_init(&sk->write_queue); skb_queue_head_init(&sk->receive_queue); skb_queue_head_init(&sk->back_log); sk->protinfo.af_unix.family=AF_UNIX; sk->protinfo.af_unix.inode=NULL; sk->protinfo.af_unix.locks=1; /* Us */ sk->protinfo.af_unix.readsem=MUTEX; /* single task reading lock */ sk->rcvbuf=SK_RMEM_MAX; sk->sndbuf=SK_WMEM_MAX; sk->allocation=GFP_KERNEL; sk->state=TCP_CLOSE; sk->priority=SOPRI_NORMAL; sk->state_change=def_callback1; sk->data_ready=def_callback2; sk->write_space=def_callback3; sk->error_report=def_callback1; sk->mtu=4096; sk->socket=sock; sock->data=(void *)sk; sk->sleep=sock->wait; unix_insert_socket(sk); return 0; } static int unix_dup(struct socket *newsock, struct socket *oldsock) { return unix_create(newsock,0); } static int unix_release(struct socket *sock, struct socket *peer) { unix_socket *sk=sock->data; unix_socket *skpair; /* May not have data attached */ if(sk==NULL) return 0; sk->state_change(sk); sk->dead=1; skpair=(unix_socket *)sk->protinfo.af_unix.other; /* Person we send to (default) */ if(sk->type==SOCK_STREAM && skpair!=NULL && skpair->state!=TCP_LISTEN) { skpair->shutdown=SHUTDOWN_MASK; /* No more writes */ skpair->state_change(skpair); /* Wake any blocked writes */ } if(skpair!=NULL) skpair->protinfo.af_unix.locks--; /* It may now die */ sk->protinfo.af_unix.other=NULL; /* No pair */ unix_destroy_socket(sk); /* Try to flush out this socket. Throw out buffers at least */ unix_gc(); /* Garbage collect fds */ /* * FIXME: BSD difference: In BSD all sockets connected to use get ECONNRESET and we die on the spot. In * Linux we behave like files and pipes do and wait for the last dereference. */ sock->data = NULL; sk->socket = NULL; return 0; } static unix_socket *unix_find_other(char *path, int *error) { int old_fs; int err; struct inode *inode; unix_socket *u; old_fs=get_fs(); set_fs(get_ds()); err = open_namei(path, 2, S_IFSOCK, &inode, NULL); set_fs(old_fs); if(err<0) { *error=err; return NULL; } u=unix_find_socket(inode); iput(inode); if(u==NULL) { *error=-ECONNREFUSED; return NULL; } return u; } static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; unix_socket *sk=sock->data; int old_fs; int err; if(sk->protinfo.af_unix.name) return -EINVAL; /* Already bound */ if(addr_len>sizeof(struct sockaddr_un) || addr_len<3 || sunaddr->sun_family!=AF_UNIX) return -EINVAL; unix_mkname(sunaddr, addr_len); /* * Put ourselves in the filesystem */ if(sk->protinfo.af_unix.inode!=NULL) return -EINVAL; sk->protinfo.af_unix.name=kmalloc(addr_len+1, GFP_KERNEL); if(sk->protinfo.af_unix.name==NULL) return -ENOMEM; memcpy(sk->protinfo.af_unix.name, sunaddr->sun_path, addr_len+1); old_fs=get_fs(); set_fs(get_ds()); err=do_mknod(sk->protinfo.af_unix.name,S_IFSOCK|S_IRWXUGO,0); if(err==0) err=open_namei(sk->protinfo.af_unix.name, 2, S_IFSOCK, &sk->protinfo.af_unix.inode, NULL); set_fs(old_fs); if(err<0) { kfree_s(sk->protinfo.af_unix.name,addr_len+1); sk->protinfo.af_unix.name=NULL; if(err==-EEXIST) return -EADDRINUSE; else return err; } return 0; } static int unix_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { unix_socket *sk=sock->data; struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; unix_socket *other; struct sk_buff *skb; int err; if(sk->type==SOCK_STREAM && sk->protinfo.af_unix.other) { if(sock->state==SS_CONNECTING && sk->state==TCP_ESTABLISHED) { sock->state=SS_CONNECTED; return 0; } if(sock->state==SS_CONNECTING && sk->state == TCP_CLOSE) { sock->state=SS_UNCONNECTED; return -ECONNREFUSED; } if(sock->state!=SS_CONNECTING) return -EISCONN; if(flags&O_NONBLOCK) return -EALREADY; /* * Drop through the connect up logic to the wait. */ } if(addr_len < sizeof(sunaddr->sun_family)+1 || sunaddr->sun_family!=AF_UNIX) return -EINVAL; unix_mkname(sunaddr, addr_len); if(sk->type==SOCK_DGRAM) { if(sk->protinfo.af_unix.other) { sk->protinfo.af_unix.other->protinfo.af_unix.locks--; sk->protinfo.af_unix.other=NULL; sock->state=SS_UNCONNECTED; } other=unix_find_other(sunaddr->sun_path, &err); if(other==NULL) return err; if(other->type!=sk->type) return -EPROTOTYPE; other->protinfo.af_unix.locks++; sk->protinfo.af_unix.other=other; sock->state=SS_CONNECTED; sk->state=TCP_ESTABLISHED; return 0; /* Done */ } if(sock->state==SS_UNCONNECTED) { /* * Now ready to connect */ skb=sock_alloc_send_skb(sk, 0, 0, 0, &err); /* Marker object */ if(skb==NULL) return err; skb->sk=sk; /* So they know it is us */ skb->free=1; skb->h.filp=NULL; sk->state=TCP_CLOSE; unix_mkname(sunaddr, addr_len); other=unix_find_other(sunaddr->sun_path, &err); if(other==NULL) { kfree_skb(skb, FREE_WRITE); return err; } if(other->type!=sk->type) { kfree_skb(skb, FREE_WRITE); return -EPROTOTYPE; } other->protinfo.af_unix.locks++; /* Lock the other socket so it doesn't run off for a moment */ other->ack_backlog++; sk->protinfo.af_unix.other=other; skb_queue_tail(&other->receive_queue,skb); sk->state=TCP_SYN_SENT; sock->state=SS_CONNECTING; sti(); other->data_ready(other,0); /* Wake up ! */ } /* Wait for an accept */ cli(); while(sk->state==TCP_SYN_SENT) { if(flags&O_NONBLOCK) { sti(); return -EINPROGRESS; } interruptible_sleep_on(sk->sleep); if(current->signal & ~current->blocked) { sti(); return -ERESTARTSYS; } } /* * Has the other end closed on us ? */ if(sk->state==TCP_CLOSE) { sk->protinfo.af_unix.other->protinfo.af_unix.locks--; sk->protinfo.af_unix.other=NULL; sock->state=SS_UNCONNECTED; sti(); return -ECONNREFUSED; } /* * Amazingly it has worked */ sock->state=SS_CONNECTED; sti(); return 0; } static int unix_socketpair(struct socket *a, struct socket *b) { unix_socket *ska,*skb; ska=a->data; skb=b->data; /* Join our sockets back to back */ ska->protinfo.af_unix.locks++; skb->protinfo.af_unix.locks++; ska->protinfo.af_unix.other=skb; skb->protinfo.af_unix.other=ska; ska->state=TCP_ESTABLISHED; skb->state=TCP_ESTABLISHED; return 0; } static int unix_accept(struct socket *sock, struct socket *newsock, int flags) { unix_socket *sk=sock->data; unix_socket *newsk, *tsk; struct sk_buff *skb; if(sk->type!=SOCK_STREAM) { return -EOPNOTSUPP; } if(sk->state!=TCP_LISTEN) { return -EINVAL; } newsk=newsock->data; if(sk->protinfo.af_unix.name!=NULL) { newsk->protinfo.af_unix.name=kmalloc(strlen(sk->protinfo.af_unix.name)+1, GFP_KERNEL); if(newsk->protinfo.af_unix.name==NULL) return -ENOMEM; strcpy(newsk->protinfo.af_unix.name, sk->protinfo.af_unix.name); } do { cli(); skb=skb_dequeue(&sk->receive_queue); if(skb==NULL) { if(flags&O_NONBLOCK) { sti(); return -EAGAIN; } interruptible_sleep_on(sk->sleep); if(current->signal & ~current->blocked) { sti(); return -ERESTARTSYS; } sti(); } } while(skb==NULL); tsk=skb->sk; kfree_skb(skb, FREE_WRITE); /* The buffer is just used as a tag */ sk->ack_backlog--; newsk->protinfo.af_unix.other=tsk; tsk->protinfo.af_unix.other=newsk; tsk->state=TCP_ESTABLISHED; newsk->state=TCP_ESTABLISHED; newsk->protinfo.af_unix.locks++; /* Swap lock over */ sk->protinfo.af_unix.locks--; /* Locked to child socket not master */ tsk->protinfo.af_unix.locks++; /* Back lock */ sti(); tsk->state_change(tsk); /* Wake up any sleeping connect */ sock_wake_async(tsk->socket, 0); return 0; } static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { unix_socket *sk=sock->data; struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; if(peer) { if(sk->protinfo.af_unix.other==NULL) return -ENOTCONN; sk=sk->protinfo.af_unix.other; } sunaddr->sun_family=AF_UNIX; if(sk->protinfo.af_unix.name==NULL) { *sunaddr->sun_path=0; *uaddr_len=sizeof(sunaddr->sun_family)+1; return 0; /* Not bound */ } *uaddr_len=sizeof(sunaddr->sun_family)+strlen(sk->protinfo.af_unix.name)+1; strcpy(sunaddr->sun_path,sk->protinfo.af_unix.name); /* 108 byte limited */ return 0; } /* * Support routines for struct cmsghdr handling */ static struct cmsghdr *unix_copyrights(void *userp, int len) { struct cmsghdr *cm; if(len>256|| len <=0) return NULL; cm=kmalloc(len, GFP_KERNEL); memcpy_fromfs(cm, userp, len); return cm; } /* * Return a header block */ static void unix_returnrights(void *userp, int len, struct cmsghdr *cm) { memcpy_tofs(userp, cm, len); kfree(cm); } /* * Copy file descriptors into system space. * Return number copied or negative error code */ static int unix_fd_copy(struct sock *sk, struct cmsghdr *cmsg, struct file **fp) { int num=cmsg->cmsg_len-sizeof(struct cmsghdr); int i; int *fdp=(int *)cmsg->cmsg_data; num/=4; /* Odd bytes are forgotten in BSD not errored */ if(num>=UNIX_MAX_FD) return -EINVAL; /* * Verify the descriptors. */ for(i=0; i< num; i++) { int fd; fd = fdp[i]; #if 0 printk("testing fd %d\n", fd); #endif if(fd < 0|| fd >=NR_OPEN) return -EBADF; if(current->files->fd[fd]==NULL) return -EBADF; } /* add another reference to these files */ for(i=0; i< num; i++) { fp[i]=current->files->fd[fdp[i]]; fp[i]->f_count++; unix_inflight(fp[i]); } return num; } /* * Free the descriptors in the array */ static void unix_fd_free(struct sock *sk, struct file **fp, int num) { int i; for(i=0;i<num;i++) { close_fp(fp[i]); unix_notinflight(fp[i]); } } /* * Count the free descriptors available to a process. * Interpretation issue: Is the limit the highest descriptor (buggy * allowing passed fd's higher up to cause a limit to be exceeded) - * but how the old code did it - or like this... */ static int unix_files_free(void) { int i; int n=0; for (i=0;i<NR_OPEN;i++) { if(current->files->fd[i]) n++; } i=NR_OPEN; if(i>current->rlim[RLIMIT_NOFILE].rlim_cur) i=current->rlim[RLIMIT_NOFILE].rlim_cur; if(n>=i) return 0; return i-n; } /* * Perform the AF_UNIX file descriptor pass out functionality. This * is nasty and messy as is the whole design of BSD file passing. */ static void unix_detach_fds(struct sk_buff *skb, struct cmsghdr *cmsg) { int i; /* count of space in parent for fds */ int cmnum; struct file **fp; struct file **ufp; int *cmfptr=NULL; /* =NULL To keep gcc happy */ /* number of fds actually passed */ int fdnum; int ffree; int ufn=0; if(cmsg==NULL) cmnum=0; else { cmnum=cmsg->cmsg_len-sizeof(struct cmsghdr); cmnum/=sizeof(int); cmfptr=(int *)&cmsg->cmsg_data; } memcpy(&fdnum,skb->h.filp,sizeof(int)); fp=(struct file **)(skb->h.filp+sizeof(int)); if(cmnum>fdnum) cmnum=fdnum; ffree=unix_files_free(); if(cmnum>ffree) cmnum=ffree; ufp=¤t->files->fd[0]; /* * Copy those that fit */ for(i=0;i<cmnum;i++) { /* * Insert the fd */ while(ufp[ufn]!=NULL) ufn++; ufp[ufn]=fp[i]; *cmfptr++=ufn; FD_CLR(ufn,¤t->files->close_on_exec); unix_notinflight(fp[i]); } /* * Dump those that don't */ for(;i<fdnum;i++) { close_fp(fp[i]); unix_notinflight(fp[i]); } kfree(skb->h.filp); skb->h.filp=NULL; /* no need to use destructor */ skb->destructor = NULL; } static void unix_destruct_fds(struct sk_buff *skb) { unix_detach_fds(skb,NULL); } /* * Attach the file descriptor array to an sk_buff */ static void unix_attach_fds(int fpnum,struct file **fp,struct sk_buff *skb) { skb->h.filp=kmalloc(sizeof(int)+fpnum*sizeof(struct file *), GFP_KERNEL); /* number of descriptors starts block */ memcpy(skb->h.filp,&fpnum,sizeof(int)); /* actual descriptors */ memcpy(skb->h.filp+sizeof(int),fp,fpnum*sizeof(struct file *)); skb->destructor = unix_destruct_fds; } /* * Send AF_UNIX data. */ static int unix_sendmsg(struct socket *sock, struct msghdr *msg, int len, int nonblock, int flags) { unix_socket *sk=sock->data; unix_socket *other; struct sockaddr_un *sunaddr=msg->msg_name; int err,size; struct sk_buff *skb; int limit=0; int sent=0; struct file *fp[UNIX_MAX_FD]; /* number of fds waiting to be passed, 0 means either * no fds to pass or they've already been passed */ int fpnum=0; if(sk->err) return sock_error(sk); if(flags&MSG_OOB) return -EOPNOTSUPP; if(flags) /* For now */ { return -EINVAL; } if(sunaddr!=NULL) { if(sock->type==SOCK_STREAM) { if(sk->state==TCP_ESTABLISHED) return -EISCONN; else return -EOPNOTSUPP; } } if(sunaddr==NULL) { if(sk->protinfo.af_unix.other==NULL) return -ENOTCONN; } /* * A control message has been attached. */ if(msg->msg_control) { struct cmsghdr *cm=unix_copyrights(msg->msg_control, msg->msg_controllen); if(cm==NULL || msg->msg_controllen<sizeof(struct cmsghdr) || cm->cmsg_type!=SCM_RIGHTS || cm->cmsg_level!=SOL_SOCKET || msg->msg_controllen!=cm->cmsg_len) { kfree(cm); return -EINVAL; } fpnum=unix_fd_copy(sk,cm,fp); kfree(cm); if(fpnum<0) { return fpnum; } } while(sent < len) { /* * Optimisation for the fact that under 0.01% of X messages typically * need breaking up. */ size=len-sent; if(size>(sk->sndbuf-sizeof(struct sk_buff))/2) /* Keep two messages in the pipe so it schedules better */ { if(sock->type==SOCK_DGRAM) { unix_fd_free(sk,fp,fpnum); return -EMSGSIZE; } size=(sk->sndbuf-sizeof(struct sk_buff))/2; } /* * Keep to page sized kmalloc()'s as various people * have suggested. Big mallocs stress the vm too * much. */ if(size > 4000 && sock->type!=SOCK_DGRAM) limit = 4000; /* Fall back to 4K if we can't grab a big buffer this instant */ else limit = 0; /* Otherwise just grab and wait */ /* * Grab a buffer */ skb=sock_alloc_send_skb(sk,size,limit,nonblock, &err); if(skb==NULL) { unix_fd_free(sk,fp,fpnum); if(sent) { sk->err=-err; return sent; } return err; } size=skb_tailroom(skb); /* If we dropped back on a limit then our skb is smaller */ skb->sk=sk; skb->free=1; if(fpnum) { unix_attach_fds(fpnum,fp,skb); fpnum=0; } else skb->h.filp=NULL; memcpy_fromiovec(skb_put(skb,size),msg->msg_iov, size); cli(); if(sunaddr==NULL) { other=sk->protinfo.af_unix.other; if(sock->type==SOCK_DGRAM && other->dead) { other->protinfo.af_unix.locks--; sk->protinfo.af_unix.other=NULL; sock->state=SS_UNCONNECTED; sti(); kfree_skb(skb, FREE_WRITE); if(!sent) return -ECONNRESET; else return sent; } } else { unix_mkname(sunaddr, msg->msg_namelen); other=unix_find_other(sunaddr->sun_path, &err); if(other==NULL) { sti(); kfree_skb(skb, FREE_WRITE); if(sent) return sent; else return err; } } skb_queue_tail(&other->receive_queue, skb); sti(); /* if we sent an fd, only do it once */ other->data_ready(other,size); sent+=size; } return sent; } /* * Sleep until data has arrive. But check for races.. */ static void unix_data_wait(unix_socket * sk) { cli(); if (!skb_peek(&sk->receive_queue)) { sk->socket->flags |= SO_WAITDATA; interruptible_sleep_on(sk->sleep); sk->socket->flags &= ~SO_WAITDATA; } sti(); } static int unix_recvmsg(struct socket *sock, struct msghdr *msg, int size, int noblock, int flags, int *addr_len) { unix_socket *sk=sock->data; struct sockaddr_un *sunaddr=msg->msg_name; struct sk_buff *skb; int copied=0; unsigned char *sp; int len; int num; struct iovec *iov=msg->msg_iov; struct cmsghdr *cm=NULL; int ct=msg->msg_iovlen; if(flags&MSG_OOB) return -EOPNOTSUPP; if(addr_len) *addr_len=0; if(sk->err) return sock_error(sk); if(msg->msg_control) { cm=unix_copyrights(msg->msg_control, msg->msg_controllen); if(msg->msg_controllen<sizeof(struct cmsghdr) #if 0 /* investigate this further -- Stevens example doesn't seem to care */ || cm->cmsg_type!=SCM_RIGHTS || cm->cmsg_level!=SOL_SOCKET || msg->msg_controllen!=cm->cmsg_len #endif ) { kfree(cm); /* printk("recvmsg: Bad msg_control\n");*/ return -EINVAL; } } down(&sk->protinfo.af_unix.readsem); /* Lock the socket */ while(ct--) { int done=0; sp=iov->iov_base; len=iov->iov_len; iov++; while(done<len) { if (copied && (flags & MSG_PEEK)) goto out; if (copied == size) goto out; skb=skb_dequeue(&sk->receive_queue); if(skb==NULL) { up(&sk->protinfo.af_unix.readsem); if(sk->shutdown & RCV_SHUTDOWN) return copied; if(copied) return copied; if(noblock) return -EAGAIN; if(current->signal & ~current->blocked) return -ERESTARTSYS; unix_data_wait(sk); down(&sk->protinfo.af_unix.readsem); continue; } if(msg->msg_name!=NULL) { sunaddr->sun_family=AF_UNIX; if(skb->sk->protinfo.af_unix.name) { memcpy(sunaddr->sun_path, skb->sk->protinfo.af_unix.name, 108); if(addr_len) *addr_len=strlen(sunaddr->sun_path)+sizeof(short); } else if(addr_len) *addr_len=sizeof(short); } num=min(skb->len,len-done); memcpy_tofs(sp, skb->data, num); if (skb->h.filp!=NULL) unix_detach_fds(skb,cm); copied+=num; done+=num; sp+=num; if (!(flags & MSG_PEEK)) skb_pull(skb, num); /* put the skb back if we didn't use it up.. */ if (skb->len) { skb_queue_head(&sk->receive_queue, skb); continue; } kfree_skb(skb, FREE_WRITE); if(sock->type==SOCK_DGRAM || cm) goto out; } } out: up(&sk->protinfo.af_unix.readsem); if(cm) unix_returnrights(msg->msg_control,msg->msg_controllen,cm); return copied; } static int unix_shutdown(struct socket *sock, int mode) { unix_socket *sk=(unix_socket *)sock->data; unix_socket *other=sk->protinfo.af_unix.other; if(mode&SEND_SHUTDOWN) { sk->shutdown|=SEND_SHUTDOWN; sk->state_change(sk); if(other) { other->shutdown|=RCV_SHUTDOWN; other->state_change(other); } } other=sk->protinfo.af_unix.other; if(mode&RCV_SHUTDOWN) { sk->shutdown|=RCV_SHUTDOWN; sk->state_change(sk); if(other) { other->shutdown|=SEND_SHUTDOWN; other->state_change(other); } } return 0; } static int unix_select(struct socket *sock, int sel_type, select_table *wait) { return datagram_select(sock->data,sel_type,wait); } static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { unix_socket *sk=sock->data; int err; long amount=0; switch(cmd) { case TIOCOUTQ: err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); if(err) return err; amount=sk->sndbuf-sk->wmem_alloc; if(amount<0) amount=0; put_fs_long(amount,(unsigned long *)arg); return 0; case TIOCINQ: { struct sk_buff *skb; if(sk->state==TCP_LISTEN) return -EINVAL; /* These two are safe on a single CPU system as only user tasks fiddle here */ if((skb=skb_peek(&sk->receive_queue))!=NULL) amount=skb->len; err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long)); if(err) return err; put_fs_long(amount,(unsigned long *)arg); return 0; } default: return -EINVAL; } /*NOTREACHED*/ return(0); } #ifdef CONFIG_PROC_FS static int unix_get_info(char *buffer, char **start, off_t offset, int length, int dummy) { off_t pos=0; off_t begin=0; int len=0; unix_socket *s=unix_socket_list; len+= sprintf(buffer,"Num RefCount Protocol Flags Type St " "Inode Path\n"); while(s!=NULL) { len+=sprintf(buffer+len,"%p: %08X %08X %08lX %04X %02X %5ld", s, s->protinfo.af_unix.locks, 0, s->socket->flags, s->socket->type, s->socket->state, s->socket->inode ? s->socket->inode->i_ino : 0); if(s->protinfo.af_unix.name!=NULL) len+=sprintf(buffer+len, " %s\n", s->protinfo.af_unix.name); else buffer[len++]='\n'; pos=begin+len; if(pos<offset) { len=0; begin=pos; } if(pos>offset+length) break; s=s->next; } *start=buffer+(offset-begin); len-=(offset-begin); if(len>length) len=length; return len; } #endif struct proto_ops unix_proto_ops = { AF_UNIX, unix_create, unix_dup, unix_release, unix_bind, unix_connect, unix_socketpair, unix_accept, unix_getname, unix_select, unix_ioctl, unix_listen, unix_shutdown, unix_setsockopt, unix_getsockopt, unix_fcntl, unix_sendmsg, unix_recvmsg }; void unix_proto_init(struct net_proto *pro) { printk(KERN_INFO "NET3: Unix domain sockets 0.12 for Linux NET3.035.\n"); sock_register(unix_proto_ops.family, &unix_proto_ops); #ifdef CONFIG_PROC_FS proc_net_register(&(struct proc_dir_entry) { PROC_NET_UNIX, 4, "unix", S_IFREG | S_IRUGO, 1, 0, 0, 0, &proc_net_inode_operations, unix_get_info }); #endif } /* * Local variables: * compile-command: "gcc -g -D__KERNEL__ -Wall -O6 -I/usr/src/linux/include -c af_unix.c" * End: */ </body>