/*
 * NET3:	Implementation of BSD Unix domain sockets.
 *
 * Authors:	Alan Cox, <alan@cymru.net>
 *
 *		Currently this contains all but the file descriptor passing code.
 *		Before that goes in the odd bugs in the iovec handlers need 
 *		fixing, and this bit testing. BSD fd passing is not a trivial part
 *		of the exercise it turns out. Anyone like writing garbage collectors.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Fixes:
 *		Linus Torvalds	:	Assorted bug cures.
 *		Niibe Yutaka	:	async I/O support.
 *		Carsten Paeth	:	PF_UNIX check, address fixes.
 *		Alan Cox	:	Limit size of allocated blocks.
 *		Alan Cox	:	Fixed the stupid socketpair bug.
 *		Alan Cox	:	BSD compatibility fine tuning.
 *		Alan Cox	:	Fixed a bug in connect when interrupted.
 *		Alan Cox	:	Sorted out a proper draft version of
 *					file descriptor passing hacked up from
 *					Mike Shaver's work.
 *		Marty Leisner	:	Fixes to fd passing
 *		Nick Nevin	:	recvmsg bugfix.
 *		Alan Cox	:	Started proper garbage collector
 *		Heiko EiBfeldt	:	Missing verify_area check
 *
 * Known differences from reference BSD that was tested:
 *
 *	[TO FIX]
 *	ECONNREFUSED is not returned from one end of a connected() socket to the
 *		other the moment one end closes.
 *	fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark
 *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
 *	[NOT TO FIX]
 *	accept() returns a path name even if the connecting socket has closed
 *		in the meantime (BSD loses the path and gives up).
 *	accept() returns 0 length path for an unbound connector. BSD returns 16
 *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
 *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
 *	BSD af_unix apparently has connect forgetting to block properly.
 */

#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/un.h>
#include <linux/fcntl.h>
#include <linux/termios.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/fs.h>
#include <linux/malloc.h>
#include <asm/segment.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/af_unix.h>
#include <linux/proc_fs.h>

unix_socket *unix_socket_list=NULL;

#define min(a,b)	(((a)<(b))?(a):(b))

/*
 *	Make sure the unix name is null-terminated.
 */
 
static inline void unix_mkname(struct sockaddr_un * sunaddr, unsigned long len)
{
	if (len >= sizeof(*sunaddr))
		len = sizeof(*sunaddr)-1;
	((char *)sunaddr)[len]=0;
}

 
/*
 *	Note: Sockets may not be removed _during_ an interrupt or net_bh
 *	handler using this technique. They can be added although we do not
 *	use this facility.
 */
 
static void unix_remove_socket(unix_socket *sk)
{
	unix_socket **s;
	
	cli();
	s=&unix_socket_list;

	while(*s!=NULL)
	{
		if(*s==sk)
		{
			*s=sk->next;
			sti();
			return;
		}
		s=&((*s)->next);
	}
	sti();
}

static void unix_insert_socket(unix_socket *sk)
{
	cli();
	sk->next=unix_socket_list;
	unix_socket_list=sk;
	sti();
}

static unix_socket *unix_find_socket(struct inode *i)
{
	unix_socket *s;
	cli();
	s=unix_socket_list;
	while(s)
	{
		if(s->protinfo.af_unix.inode==i)
		{
			sti();
			return(s);
		}
		s=s->next;
	}
	sti();
	return(NULL);
}

/*
 *	Delete a unix socket. We have to allow for deferring this on a timer.
 */

static void unix_destroy_timer(unsigned long data)
{
	unix_socket *sk=(unix_socket *)data;
	if(sk->protinfo.af_unix.locks==0 && sk->wmem_alloc==0)
	{
		if(sk->protinfo.af_unix.name)
			kfree(sk->protinfo.af_unix.name);
		sk_free(sk);
		return;
	}
	
	/*
	 *	Retry;
	 */
	 
	sk->timer.expires=jiffies+10*HZ;	/* No real hurry try it every 10 seconds or so */
	add_timer(&sk->timer);
}
	 
	 
static void unix_delayed_delete(unix_socket *sk)
{
	sk->timer.data=(unsigned long)sk;
	sk->timer.expires=jiffies+HZ;		/* Normally 1 second after will clean up. After that we try every 10 */
	sk->timer.function=unix_destroy_timer;
	add_timer(&sk->timer);
}
	
static void unix_destroy_socket(unix_socket *sk)
{
	struct sk_buff *skb;

	unix_remove_socket(sk);
	
	while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
	{
		if(sk->state==TCP_LISTEN)
		{
			unix_socket *osk=skb->sk;
			osk->state=TCP_CLOSE;
			kfree_skb(skb, FREE_WRITE);	/* Now surplus - free the skb first before the socket */
			osk->state_change(osk);		/* So the connect wakes and cleans up (if any) */
			/* osk will be destroyed when it gets to close or the timer fires */			
		}
		else
		{
			/* passed fds are erased in the kfree_skb hook */
			kfree_skb(skb,FREE_WRITE);
		}
	}
	
	if(sk->protinfo.af_unix.inode!=NULL)
	{
		iput(sk->protinfo.af_unix.inode);
		sk->protinfo.af_unix.inode=NULL;
	}
	
	if(--sk->protinfo.af_unix.locks==0 && sk->wmem_alloc==0)
	{
		if(sk->protinfo.af_unix.name)
			kfree(sk->protinfo.af_unix.name);
		sk_free(sk);
	}
	else
	{
		sk->dead=1;
		unix_delayed_delete(sk);	/* Try every so often until buffers are all freed */
	}
}


 
/*
 *	Fixme: We need async I/O on AF_UNIX doing next.
 */
 
static int unix_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
	return -EINVAL;
}

/*
 *	Yes socket options work with the new unix domain socketry!!!!!!!
 */


 
static int unix_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen)
{
	unix_socket *sk=sock->data;
	if(level!=SOL_SOCKET)
		return -EOPNOTSUPP;
	return sock_setsockopt(sk,level,optname,optval,optlen);	
}


 
static int unix_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen)
{
	unix_socket *sk=sock->data;
	if(level!=SOL_SOCKET)
		return -EOPNOTSUPP;
	return sock_getsockopt(sk,level,optname,optval,optlen);
}

 
static int unix_listen(struct socket *sock, int backlog)
{
	unix_socket *sk=sock->data;
	if(sk->type!=SOCK_STREAM)
		return -EOPNOTSUPP;		/* Only stream sockets accept */
	if(sk->protinfo.af_unix.name==NULL)
		return -EINVAL;			/* No listens on an unbound socket */
	sk->max_ack_backlog=backlog;
	sk->state=TCP_LISTEN;
	return 0;
}

static void def_callback1(struct sock *sk)
{
	if(!sk->dead)
		wake_up_interruptible(sk->sleep);
}

static void def_callback2(struct sock *sk, int len)
{
	if(!sk->dead)
	{
		wake_up_interruptible(sk->sleep);
		sock_wake_async(sk->socket, 1);
	}
}

static void def_callback3(struct sock *sk)
{
	if(!sk->dead)
	{
		wake_up_interruptible(sk->sleep);
		sock_wake_async(sk->socket, 2);
	}
}

 
static int unix_create(struct socket *sock, int protocol)
{
	unix_socket *sk;
	if(protocol && protocol != PF_UNIX)
		return -EPROTONOSUPPORT;
	sk=(unix_socket *)sk_alloc(GFP_KERNEL);
	if(sk==NULL)
		return -ENOMEM;
	switch(sock->type)
	{
		case SOCK_STREAM:
			break;
		/*
		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
		 *	nothing uses it.
		 */
		case SOCK_RAW:
			sock->type=SOCK_DGRAM;
		case SOCK_DGRAM:
			break;
		default:
			sk_free(sk);
			return -ESOCKTNOSUPPORT;
	}
	sk->type=sock->type;
	init_timer(&sk->timer);
	skb_queue_head_init(&sk->write_queue);
	skb_queue_head_init(&sk->receive_queue);
	skb_queue_head_init(&sk->back_log);
	sk->protinfo.af_unix.family=AF_UNIX;
	sk->protinfo.af_unix.inode=NULL;
	sk->protinfo.af_unix.locks=1;		/* Us */
	sk->protinfo.af_unix.readsem=MUTEX;	/* single task reading lock */
	sk->rcvbuf=SK_RMEM_MAX;
	sk->sndbuf=SK_WMEM_MAX;
	sk->allocation=GFP_KERNEL;
	sk->state=TCP_CLOSE;
	sk->priority=SOPRI_NORMAL;
	sk->state_change=def_callback1;
	sk->data_ready=def_callback2;
	sk->write_space=def_callback3;
	sk->error_report=def_callback1;
	sk->mtu=4096;
	sk->socket=sock;
	sock->data=(void *)sk;
	sk->sleep=sock->wait;
	unix_insert_socket(sk);
	return 0;
}

static int unix_dup(struct socket *newsock, struct socket *oldsock)
{
	return unix_create(newsock,0);
}


 
static int unix_release(struct socket *sock, struct socket *peer)
{
	unix_socket *sk=sock->data;
	unix_socket *skpair;
	
	/* May not have data attached */
	
	if(sk==NULL)
		return 0;
		
	sk->state_change(sk);
	sk->dead=1;
	skpair=(unix_socket *)sk->protinfo.af_unix.other;	/* Person we send to (default) */
	if(sk->type==SOCK_STREAM && skpair!=NULL && skpair->state!=TCP_LISTEN)
	{
		skpair->shutdown=SHUTDOWN_MASK;		/* No more writes */
		skpair->state_change(skpair);		/* Wake any blocked writes */
	}
	if(skpair!=NULL)
		skpair->protinfo.af_unix.locks--;	/* It may now die */
	sk->protinfo.af_unix.other=NULL;		/* No pair */
	unix_destroy_socket(sk);			/* Try to flush out this socket. Throw out buffers at least */
	unix_gc();					/* Garbage collect fds */	

	/*
	 *	FIXME: BSD difference: In BSD all sockets connected to use get ECONNRESET and we die on the spot. In
	 *	Linux we behave like files and pipes do and wait for the last dereference.
	 */
	 
	sock->data = NULL;
	sk->socket = NULL;
	
	return 0;
}


 
static unix_socket *unix_find_other(char *path, int *error)
{
	int old_fs;
	int err;
	struct inode *inode;
	unix_socket *u;
	
	old_fs=get_fs();
	set_fs(get_ds());
	err = open_namei(path, 2, S_IFSOCK, &inode, NULL);
	set_fs(old_fs);
	if(err<0)
	{
		*error=err;
		return NULL;
	}
	u=unix_find_socket(inode);
	iput(inode);
	if(u==NULL)
	{
		*error=-ECONNREFUSED;
		return NULL;
	}
	return u;
}


 
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
	unix_socket *sk=sock->data;
	int old_fs;
	int err;
	
	if(sk->protinfo.af_unix.name)
		return -EINVAL;		/* Already bound */
	
	if(addr_len>sizeof(struct sockaddr_un) || addr_len<3 || sunaddr->sun_family!=AF_UNIX)
		return -EINVAL;
	unix_mkname(sunaddr, addr_len);
	/*
	 *	Put ourselves in the filesystem
	 */
	if(sk->protinfo.af_unix.inode!=NULL)
		return -EINVAL;
	
	sk->protinfo.af_unix.name=kmalloc(addr_len+1, GFP_KERNEL);
	if(sk->protinfo.af_unix.name==NULL)
		return -ENOMEM;
	memcpy(sk->protinfo.af_unix.name, sunaddr->sun_path, addr_len+1);
	
	old_fs=get_fs();
	set_fs(get_ds());
	
	err=do_mknod(sk->protinfo.af_unix.name,S_IFSOCK|S_IRWXUGO,0);
	if(err==0)
		err=open_namei(sk->protinfo.af_unix.name, 2, S_IFSOCK, &sk->protinfo.af_unix.inode, NULL);
	
	set_fs(old_fs);
	
	if(err<0)
	{
		kfree_s(sk->protinfo.af_unix.name,addr_len+1);
		sk->protinfo.af_unix.name=NULL;
		if(err==-EEXIST)
			return -EADDRINUSE;
		else
			return err;
	}
	
	return 0;
	
}


 
static int unix_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags)
{
	unix_socket *sk=sock->data;
	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
	unix_socket *other;
	struct sk_buff *skb;
	int err;

	if(sk->type==SOCK_STREAM && sk->protinfo.af_unix.other)
	{
		if(sock->state==SS_CONNECTING && sk->state==TCP_ESTABLISHED)
		{
			sock->state=SS_CONNECTED;
			return 0;
		}
		if(sock->state==SS_CONNECTING && sk->state == TCP_CLOSE)
		{
			sock->state=SS_UNCONNECTED;
			return -ECONNREFUSED;
		}
		if(sock->state!=SS_CONNECTING)
			return -EISCONN;
		if(flags&O_NONBLOCK)
			return -EALREADY;
		/*
		 *	Drop through the connect up logic to the wait.
		 */
	}
	
	if(addr_len < sizeof(sunaddr->sun_family)+1 || sunaddr->sun_family!=AF_UNIX)
		return -EINVAL;
		
	unix_mkname(sunaddr, addr_len);
		
	if(sk->type==SOCK_DGRAM)
	{
		if(sk->protinfo.af_unix.other)
		{
			sk->protinfo.af_unix.other->protinfo.af_unix.locks--;
			sk->protinfo.af_unix.other=NULL;
			sock->state=SS_UNCONNECTED;
		}
		other=unix_find_other(sunaddr->sun_path, &err);
		if(other==NULL)
			return err;
		if(other->type!=sk->type)
			return -EPROTOTYPE;
		other->protinfo.af_unix.locks++;
		sk->protinfo.af_unix.other=other;
		sock->state=SS_CONNECTED;
		sk->state=TCP_ESTABLISHED;
		return 0;			/* Done */
	}
	

	if(sock->state==SS_UNCONNECTED)
	{
		/*
		 *	Now ready to connect
		 */
	 
		skb=sock_alloc_send_skb(sk, 0, 0, 0, &err); /* Marker object */
		if(skb==NULL)
			return err;
		skb->sk=sk;				/* So they know it is us */
		skb->free=1;
		skb->h.filp=NULL;
		sk->state=TCP_CLOSE;
		unix_mkname(sunaddr, addr_len);
		other=unix_find_other(sunaddr->sun_path, &err);
		if(other==NULL)
		{
			kfree_skb(skb, FREE_WRITE);
			return err;
		}
		if(other->type!=sk->type)
		{
			kfree_skb(skb, FREE_WRITE);
			return -EPROTOTYPE;
		}
		other->protinfo.af_unix.locks++;		/* Lock the other socket so it doesn't run off for a moment */
		other->ack_backlog++;
		sk->protinfo.af_unix.other=other;
		skb_queue_tail(&other->receive_queue,skb);
		sk->state=TCP_SYN_SENT;
		sock->state=SS_CONNECTING;
		sti();
		other->data_ready(other,0);		/* Wake up ! */		
	}
			
	
	/* Wait for an accept */
	
	cli();
	while(sk->state==TCP_SYN_SENT)
	{
		if(flags&O_NONBLOCK)
		{
			sti();
			return -EINPROGRESS;
		}
		interruptible_sleep_on(sk->sleep);
		if(current->signal & ~current->blocked)
		{
			sti();
			return -ERESTARTSYS;
		}
	}
	
	/*
	 *	Has the other end closed on us ?
	 */
	 
	if(sk->state==TCP_CLOSE)
	{
		sk->protinfo.af_unix.other->protinfo.af_unix.locks--;
		sk->protinfo.af_unix.other=NULL;
		sock->state=SS_UNCONNECTED;
		sti();
		return -ECONNREFUSED;
	}
	
	/*
	 *	Amazingly it has worked
	 */
	 
	sock->state=SS_CONNECTED;
	sti();
	return 0;
	
}

 
static int unix_socketpair(struct socket *a, struct socket *b)
{
	unix_socket *ska,*skb;	
	
	ska=a->data;
	skb=b->data;

	/* Join our sockets back to back */
	ska->protinfo.af_unix.locks++;
	skb->protinfo.af_unix.locks++;
	ska->protinfo.af_unix.other=skb;
	skb->protinfo.af_unix.other=ska;
	ska->state=TCP_ESTABLISHED;
	skb->state=TCP_ESTABLISHED;
	return 0;
}


 
static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
{
	unix_socket *sk=sock->data;
	unix_socket *newsk, *tsk;
	struct sk_buff *skb;
	
	if(sk->type!=SOCK_STREAM)
	{
		return -EOPNOTSUPP;
	}
	if(sk->state!=TCP_LISTEN)
	{
		return -EINVAL;
	}
		
	newsk=newsock->data;
	if(sk->protinfo.af_unix.name!=NULL)
	{
		newsk->protinfo.af_unix.name=kmalloc(strlen(sk->protinfo.af_unix.name)+1, GFP_KERNEL);
		if(newsk->protinfo.af_unix.name==NULL)
			return -ENOMEM;
		strcpy(newsk->protinfo.af_unix.name, sk->protinfo.af_unix.name);
	}
		
	do
	{
		cli();
		skb=skb_dequeue(&sk->receive_queue);
		if(skb==NULL)
		{
			if(flags&O_NONBLOCK)
			{
				sti();
				return -EAGAIN;
			}
			interruptible_sleep_on(sk->sleep);
			if(current->signal & ~current->blocked)
			{
				sti();
				return -ERESTARTSYS;
			}
			sti();
		}
	}
	while(skb==NULL);
	tsk=skb->sk;
	kfree_skb(skb, FREE_WRITE);	/* The buffer is just used as a tag */
	sk->ack_backlog--;
	newsk->protinfo.af_unix.other=tsk;
	tsk->protinfo.af_unix.other=newsk;
	tsk->state=TCP_ESTABLISHED;
	newsk->state=TCP_ESTABLISHED;
	newsk->protinfo.af_unix.locks++;	/* Swap lock over */
	sk->protinfo.af_unix.locks--;	/* Locked to child socket not master */
	tsk->protinfo.af_unix.locks++;	/* Back lock */
	sti();
	tsk->state_change(tsk);		/* Wake up any sleeping connect */
	sock_wake_async(tsk->socket, 0);
	return 0;
}


 
static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
{
	unix_socket *sk=sock->data;
	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
	
	if(peer)
	{
		if(sk->protinfo.af_unix.other==NULL)
			return -ENOTCONN;
		sk=sk->protinfo.af_unix.other;
	}
	sunaddr->sun_family=AF_UNIX;
	if(sk->protinfo.af_unix.name==NULL)
	{
		*sunaddr->sun_path=0;
		*uaddr_len=sizeof(sunaddr->sun_family)+1;
		return 0;		/* Not bound */
	}
	*uaddr_len=sizeof(sunaddr->sun_family)+strlen(sk->protinfo.af_unix.name)+1;
	strcpy(sunaddr->sun_path,sk->protinfo.af_unix.name);		/* 108 byte limited */
	return 0;
}

/*
 *	Support routines for struct cmsghdr handling
 */
 
static struct cmsghdr *unix_copyrights(void *userp, int len)
{
	struct cmsghdr *cm;

	if(len>256|| len <=0)
		return NULL;
	cm=kmalloc(len, GFP_KERNEL);
	memcpy_fromfs(cm, userp, len);
	return cm;
}

/*
 *	Return a header block
 */
 
static void unix_returnrights(void *userp, int len, struct cmsghdr *cm)
{
	memcpy_tofs(userp, cm, len);
	kfree(cm);
}

/*
 *	Copy file descriptors into system space.
 *	Return number copied or negative error code
 */
 
static int unix_fd_copy(struct sock *sk, struct cmsghdr *cmsg, struct file **fp)
{
	int num=cmsg->cmsg_len-sizeof(struct cmsghdr);
	int i;
	int *fdp=(int *)cmsg->cmsg_data;
	num/=4;	/* Odd bytes are forgotten in BSD not errored */
	

	if(num>=UNIX_MAX_FD)
		return -EINVAL;
	
	/*
	 *	Verify the descriptors.
	 */
	 
	for(i=0; i< num; i++)
	{
		int fd;
		
		fd = fdp[i];	
#if 0
		printk("testing  fd %d\n", fd);
#endif
		if(fd < 0|| fd >=NR_OPEN)
			return -EBADF;
		if(current->files->fd[fd]==NULL)
			return -EBADF;
	}
	
        /* add another reference to these files */
	for(i=0; i< num; i++)
	{
		fp[i]=current->files->fd[fdp[i]];
		fp[i]->f_count++;
		unix_inflight(fp[i]);
	}
	
	return num;
}

/*
 *	Free the descriptors in the array
 */

static void unix_fd_free(struct sock *sk, struct file **fp, int num)
{
	int i;
	for(i=0;i<num;i++)
	{
		close_fp(fp[i]);
		unix_notinflight(fp[i]);
	}
}

/*
 *	Count the free descriptors available to a process. 
 *	Interpretation issue: Is the limit the highest descriptor (buggy
 *	allowing passed fd's higher up to cause a limit to be exceeded) -
 *	but how the old code did it - or like this...
 */

static int unix_files_free(void)
{
	int i;
	int n=0;
	for (i=0;i<NR_OPEN;i++)
	{
		if(current->files->fd[i])
			n++;
	}
	
	i=NR_OPEN;
	if(i>current->rlim[RLIMIT_NOFILE].rlim_cur)
		i=current->rlim[RLIMIT_NOFILE].rlim_cur;
	if(n>=i)
		return 0;
	return i-n;
}

/*
 *	Perform the AF_UNIX file descriptor pass out functionality. This
 *	is nasty and messy as is the whole design of BSD file passing.
 */

static void unix_detach_fds(struct sk_buff *skb, struct cmsghdr *cmsg)
{
	int i;
	/* count of space in parent for fds */
	int cmnum;
	struct file **fp;
	struct file **ufp;
	int *cmfptr=NULL;	/* =NULL To keep gcc happy */
	/* number of fds actually passed */
	int fdnum;
	int ffree;
	int ufn=0;

	if(cmsg==NULL)
		cmnum=0;
	else
	{
		cmnum=cmsg->cmsg_len-sizeof(struct cmsghdr);
		cmnum/=sizeof(int);
		cmfptr=(int *)&cmsg->cmsg_data;
	}
	
	memcpy(&fdnum,skb->h.filp,sizeof(int));
	fp=(struct file **)(skb->h.filp+sizeof(int));
	if(cmnum>fdnum)
		cmnum=fdnum;
	ffree=unix_files_free();
	if(cmnum>ffree)
		cmnum=ffree;
	ufp=¤t->files->fd[0];
	
	/*
	 *	Copy those that fit
	 */
	for(i=0;i<cmnum;i++)
	{
		/*
		 *	Insert the fd
		 */
		while(ufp[ufn]!=NULL)
			ufn++;
		ufp[ufn]=fp[i];
		*cmfptr++=ufn;
		FD_CLR(ufn,¤t->files->close_on_exec);
		unix_notinflight(fp[i]);
	}
	/*
	 *	Dump those that don't
	 */
	for(;i<fdnum;i++)
	{
		close_fp(fp[i]);
		unix_notinflight(fp[i]);
	}
	kfree(skb->h.filp);
	skb->h.filp=NULL;

	/* no need to use destructor */
	skb->destructor = NULL;
}

static void unix_destruct_fds(struct sk_buff *skb)
{
	unix_detach_fds(skb,NULL);
}
	
/*
 *	Attach the file descriptor array to an sk_buff
 */
static void unix_attach_fds(int fpnum,struct file **fp,struct sk_buff *skb)
{

	skb->h.filp=kmalloc(sizeof(int)+fpnum*sizeof(struct file *), 
							GFP_KERNEL);
	/* number of descriptors starts block */
	memcpy(skb->h.filp,&fpnum,sizeof(int));
	/* actual  descriptors */
	memcpy(skb->h.filp+sizeof(int),fp,fpnum*sizeof(struct file *));
	skb->destructor = unix_destruct_fds;
}

 
/*
 *	Send AF_UNIX data.
 */
		
static int unix_sendmsg(struct socket *sock, struct msghdr *msg, int len, int nonblock, int flags)
{
	unix_socket *sk=sock->data;
	unix_socket *other;
	struct sockaddr_un *sunaddr=msg->msg_name;
	int err,size;
	struct sk_buff *skb;
	int limit=0;
	int sent=0;
	struct file *fp[UNIX_MAX_FD];
	/* number of fds waiting to be passed, 0 means either
	 * no fds to pass or they've already been passed 
	 */
	int fpnum=0;

	if(sk->err)
		return sock_error(sk);

	if(flags&MSG_OOB)
		return -EOPNOTSUPP;
			
	if(flags)	/* For now */ {
		return -EINVAL;
	}
		
	if(sunaddr!=NULL)
	{
		if(sock->type==SOCK_STREAM)
		{
			if(sk->state==TCP_ESTABLISHED)
				return -EISCONN;
			else
				return -EOPNOTSUPP;
		}
	}

	if(sunaddr==NULL)
	{
		if(sk->protinfo.af_unix.other==NULL)
			return -ENOTCONN;
	}

	/*
	 *	A control message has been attached.
	 */
	if(msg->msg_control) 
	{
		struct cmsghdr *cm=unix_copyrights(msg->msg_control, 
						msg->msg_controllen);
		if(cm==NULL || msg->msg_controllen<sizeof(struct cmsghdr) ||
		   cm->cmsg_type!=SCM_RIGHTS ||
		   cm->cmsg_level!=SOL_SOCKET ||
		   msg->msg_controllen!=cm->cmsg_len)
		{
			kfree(cm);
		   	return -EINVAL;
		}
		fpnum=unix_fd_copy(sk,cm,fp);
		kfree(cm);
		if(fpnum<0) {
			return fpnum;
		}
	}

	while(sent < len)
	{
		/*
		 *	Optimisation for the fact that under 0.01% of X messages typically
		 *	need breaking up.
		 */
		 
		size=len-sent;

		if(size>(sk->sndbuf-sizeof(struct sk_buff))/2)	/* Keep two messages in the pipe so it schedules better */
		{
			if(sock->type==SOCK_DGRAM)
			{
				unix_fd_free(sk,fp,fpnum);
				return -EMSGSIZE;
			}
			size=(sk->sndbuf-sizeof(struct sk_buff))/2;
		}
		/*
		 *	Keep to page sized kmalloc()'s as various people
		 *	have suggested. Big mallocs stress the vm too
		 *	much.
		 */

		if(size > 4000 && sock->type!=SOCK_DGRAM)
			limit = 4000;	/* Fall back to 4K if we can't grab a big buffer this instant */
		else
			limit = 0;	/* Otherwise just grab and wait */

		/*
		 *	Grab a buffer
		 */
		 
		skb=sock_alloc_send_skb(sk,size,limit,nonblock, &err);
		
		if(skb==NULL)
		{
			unix_fd_free(sk,fp,fpnum);
			if(sent)
			{
				sk->err=-err;
				return sent;
			}
			return err;
		}
		size=skb_tailroom(skb);		/* If we dropped back on a limit then our skb is smaller */

		skb->sk=sk;
		skb->free=1;
		
		if(fpnum)
		{
			unix_attach_fds(fpnum,fp,skb);
			fpnum=0;
		}
		else
			skb->h.filp=NULL;

		memcpy_fromiovec(skb_put(skb,size),msg->msg_iov, size);

		cli();
		if(sunaddr==NULL)
		{
			other=sk->protinfo.af_unix.other;
			if(sock->type==SOCK_DGRAM && other->dead)
			{
				other->protinfo.af_unix.locks--;
				sk->protinfo.af_unix.other=NULL;
				sock->state=SS_UNCONNECTED;
				sti();
				kfree_skb(skb, FREE_WRITE);
				if(!sent)
					return -ECONNRESET;
				else
					return sent;
			}
		}
		else
		{
			unix_mkname(sunaddr, msg->msg_namelen);
			other=unix_find_other(sunaddr->sun_path, &err);
			if(other==NULL)
			{
				sti();
				kfree_skb(skb, FREE_WRITE);
				if(sent)
					return sent;
				else
					return err;
			}
		}
		skb_queue_tail(&other->receive_queue, skb);
		sti();
		/* if we sent an fd, only do it once */
		other->data_ready(other,size);
		sent+=size;
	}
	return sent;
}


 
/*
 *	Sleep until data has arrive. But check for races..
 */
 
static void unix_data_wait(unix_socket * sk)
{
	cli();
	if (!skb_peek(&sk->receive_queue)) {
		sk->socket->flags |= SO_WAITDATA;
		interruptible_sleep_on(sk->sleep);
		sk->socket->flags &= ~SO_WAITDATA;
	}
	sti();
}


 
static int unix_recvmsg(struct socket *sock, struct msghdr *msg, int size, int noblock, int flags, int *addr_len)
{
	unix_socket *sk=sock->data;
	struct sockaddr_un *sunaddr=msg->msg_name;
	struct sk_buff *skb;
	int copied=0;
	unsigned char *sp;
	int len;
	int num;
	struct iovec *iov=msg->msg_iov;
	struct cmsghdr *cm=NULL;
	int ct=msg->msg_iovlen;

	if(flags&MSG_OOB)
		return -EOPNOTSUPP;
		
	if(addr_len)
		*addr_len=0;
		
	if(sk->err)
		return sock_error(sk);

	if(msg->msg_control) 
	{
		cm=unix_copyrights(msg->msg_control, 
			msg->msg_controllen);
		if(msg->msg_controllen<sizeof(struct cmsghdr)
#if 0 
/*		investigate this further -- Stevens example doesn't seem to care */
		||
		   cm->cmsg_type!=SCM_RIGHTS ||
		   cm->cmsg_level!=SOL_SOCKET ||
		   msg->msg_controllen!=cm->cmsg_len
#endif
		)
		{
			kfree(cm);
/*			printk("recvmsg: Bad msg_control\n");*/
		   	return -EINVAL;
		}
	}
	
	down(&sk->protinfo.af_unix.readsem);		/* Lock the socket */
	while(ct--)
	{
		int done=0;
		sp=iov->iov_base;
		len=iov->iov_len;
		iov++;
		
		while(done<len)
		{
			if (copied && (flags & MSG_PEEK))
				goto out;
			if (copied == size)
				goto out;
			skb=skb_dequeue(&sk->receive_queue);
			if(skb==NULL)
			{
				up(&sk->protinfo.af_unix.readsem);
				if(sk->shutdown & RCV_SHUTDOWN)
					return copied;
				if(copied)
					return copied;
				if(noblock)
					return -EAGAIN;
				if(current->signal & ~current->blocked)
					return -ERESTARTSYS;
				unix_data_wait(sk);
				down(&sk->protinfo.af_unix.readsem);
				continue;
			}
			if(msg->msg_name!=NULL)
			{
				sunaddr->sun_family=AF_UNIX;
				if(skb->sk->protinfo.af_unix.name)
				{
					memcpy(sunaddr->sun_path, skb->sk->protinfo.af_unix.name, 108);
					if(addr_len)
						*addr_len=strlen(sunaddr->sun_path)+sizeof(short);
				}
				else
					if(addr_len)
						*addr_len=sizeof(short);
			}

			num=min(skb->len,len-done);
			memcpy_tofs(sp, skb->data, num);

			if (skb->h.filp!=NULL)
				unix_detach_fds(skb,cm);

			copied+=num;
			done+=num;
			sp+=num;
			if (!(flags & MSG_PEEK))
				skb_pull(skb, num);
			/* put the skb back if we didn't use it up.. */
			if (skb->len) {
				skb_queue_head(&sk->receive_queue, skb);
				continue;
			}
			kfree_skb(skb, FREE_WRITE);
			if(sock->type==SOCK_DGRAM || cm)
				goto out;
		}
	}
out:
	up(&sk->protinfo.af_unix.readsem);
	if(cm)
		unix_returnrights(msg->msg_control,msg->msg_controllen,cm);
	return copied;
}


 
static int unix_shutdown(struct socket *sock, int mode)
{
	unix_socket *sk=(unix_socket *)sock->data;
	unix_socket *other=sk->protinfo.af_unix.other;
	if(mode&SEND_SHUTDOWN)
	{
		sk->shutdown|=SEND_SHUTDOWN;
		sk->state_change(sk);
		if(other)
		{
			other->shutdown|=RCV_SHUTDOWN;
			other->state_change(other);
		}
	}
	other=sk->protinfo.af_unix.other;
	if(mode&RCV_SHUTDOWN)
	{
		sk->shutdown|=RCV_SHUTDOWN;
		sk->state_change(sk);
		if(other)
		{
			other->shutdown|=SEND_SHUTDOWN;
			other->state_change(other);
		}
	}
	return 0;
}

		
		
 
static int unix_select(struct socket *sock,  int sel_type, select_table *wait)
{
	return datagram_select(sock->data,sel_type,wait);
}


 
static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
	unix_socket *sk=sock->data;
	int err;
	long amount=0;
			
	switch(cmd)
	{
	
		case TIOCOUTQ:
			err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long));
			if(err)
				return err;
			amount=sk->sndbuf-sk->wmem_alloc;
			if(amount<0)
				amount=0;
			put_fs_long(amount,(unsigned long *)arg);
			return 0;
		case TIOCINQ:
		{
			struct sk_buff *skb;
			if(sk->state==TCP_LISTEN)
				return -EINVAL;
			/* These two are safe on a single CPU system as only user tasks fiddle here */
			if((skb=skb_peek(&sk->receive_queue))!=NULL)
				amount=skb->len;
			err=verify_area(VERIFY_WRITE,(void *)arg,sizeof(unsigned long));
			if(err)
				return err;
			put_fs_long(amount,(unsigned long *)arg);
			return 0;
		}

		default:
			return -EINVAL;
	}
	/*NOTREACHED*/
	return(0);
}

#ifdef CONFIG_PROC_FS
static int unix_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
{
	off_t pos=0;
	off_t begin=0;
	int len=0;
	unix_socket *s=unix_socket_list;
	
	len+= sprintf(buffer,"Num       RefCount Protocol Flags    Type St "
	    "Inode Path\n");
	
	while(s!=NULL)
	{
		len+=sprintf(buffer+len,"%p: %08X %08X %08lX %04X %02X %5ld",
			s,
			s->protinfo.af_unix.locks,
			0,
			s->socket->flags,
			s->socket->type,
			s->socket->state,
			s->socket->inode ? s->socket->inode->i_ino : 0);
		if(s->protinfo.af_unix.name!=NULL)
			len+=sprintf(buffer+len, " %s\n", s->protinfo.af_unix.name);
		else
			buffer[len++]='\n';
		
		pos=begin+len;
		if(pos<offset)
		{
			len=0;
			begin=pos;
		}
		if(pos>offset+length)
			break;
		s=s->next;
	}
	*start=buffer+(offset-begin);
	len-=(offset-begin);
	if(len>length)
		len=length;
	return len;
}
#endif

struct proto_ops unix_proto_ops = {
	AF_UNIX,
	
	unix_create,
	unix_dup,
	unix_release,
	unix_bind,
	unix_connect,
	unix_socketpair,
	unix_accept,
	unix_getname,
	unix_select,
	unix_ioctl,
	unix_listen,
	unix_shutdown,
	unix_setsockopt,
	unix_getsockopt,
	unix_fcntl,
	unix_sendmsg,
	unix_recvmsg
};


void unix_proto_init(struct net_proto *pro)
{
	printk(KERN_INFO "NET3: Unix domain sockets 0.12 for Linux NET3.035.\n");
	sock_register(unix_proto_ops.family, &unix_proto_ops);
#ifdef CONFIG_PROC_FS
	proc_net_register(&(struct proc_dir_entry) {
		PROC_NET_UNIX,  4, "unix",
		S_IFREG | S_IRUGO, 1, 0, 0,
		0, &proc_net_inode_operations,
		unix_get_info
	});
#endif
}
/*
 * Local variables:
 *  compile-command: "gcc -g -D__KERNEL__ -Wall -O6 -I/usr/src/linux/include -c af_unix.c"
 * End:
 */
</body>