/*
 *  linux/drivers/block/ll_rw_blk.c
 *
 * Copyright (C) 1991, 1992 Linus Torvalds
 * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
 */

/*
 * This handles all read/write requests to block devices
 */
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/config.h>
#include <linux/locks.h>
#include <linux/mm.h>

#include <asm/system.h>
#include <asm/io.h>
#include <linux/blk.h>


/*
 * The request-struct contains all necessary data
 * to load a nr of sectors into memory
 */ 
static struct request all_requests[NR_REQUEST];

/*
 * The "disk" task queue is used to start the actual requests
 * after a plug
 */
DECLARE_TASK_QUEUE(tq_disk);

/*
 * used to wait on when there are no free requests
 */
struct wait_queue * wait_for_request = NULL;

/* This specifies how many sectors to read ahead on the disk.  */

int read_ahead[MAX_BLKDEV] = {0, };

/* blk_dev_struct is:
 *	*request_fn
 *	*current_request
 */
struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */

/*
 * blk_size contains the size of all block-devices in units of 1024 byte
 * sectors:
 *
 * blk_size[MAJOR][MINOR]
 *
 * if (!blk_size[MAJOR]) then no minor size checking is done.
 */
int * blk_size[MAX_BLKDEV] = { NULL, NULL, };

/*
 * blksize_size contains the size of all block-devices:
 *
 * blksize_size[MAJOR][MINOR]
 *
 * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
 */
int * blksize_size[MAX_BLKDEV] = { NULL, NULL, };

/*
 * hardsect_size contains the size of the hardware sector of a device.
 *
 * hardsect_size[MAJOR][MINOR]
 *
 * if (!hardsect_size[MAJOR])
 *		then 512 bytes is assumed.
 * else
 *		sector_size is hardsect_size[MAJOR][MINOR]
 * This is currently set by some scsi device and read by the msdos fs driver
 * This might be a some uses later.
 */
int * hardsect_size[MAX_BLKDEV] = { NULL, NULL, };

/*
 * remove the plug and let it rip..
 */
void unplug_device(void * data)
{
	struct blk_dev_struct * dev = (struct blk_dev_struct *) data;
	unsigned long flags;

	save_flags(flags);
	cli();
	if (dev->current_request == &dev->plug) {
		struct request * next = dev->plug.next;
		dev->current_request = next;
		if (next) {
			dev->plug.next = NULL;
			(dev->request_fn)();
		}
	}
	restore_flags(flags);
}

/*
 * "plug" the device if there are no outstanding requests: this will
 * force the transfer to start only after we have put all the requests
 * on the list.
 *
 * This is called with interrupts off and no requests on the queue.
 */
static inline void plug_device(struct blk_dev_struct * dev)
{
	dev->current_request = &dev->plug;
	queue_task_irq_off(&dev->plug_tq, &tq_disk);
}

/*
 * look for a free request in the first N entries.
 * NOTE: interrupts must be disabled on the way in, and will still
 *       be disabled on the way out.
 */
static inline struct request * get_request(int n, kdev_t dev)
{
	static struct request *prev_found = NULL, *prev_limit = NULL;
	register struct request *req, *limit;

	if (n <= 0)
		panic("get_request(%d): impossible!\n", n);

	limit = all_requests + n;
	if (limit != prev_limit) {
		prev_limit = limit;
		prev_found = all_requests;
	}
	req = prev_found;
	for (;;) {
		req = ((req > all_requests) ? req : limit) - 1;
		if (req->rq_status == RQ_INACTIVE)
			break;
		if (req == prev_found)
			return NULL;
	}
	prev_found = req;
	req->rq_status = RQ_ACTIVE;
	req->rq_dev = dev;
	return req;
}

/*
 * wait until a free request in the first N entries is available.
 */
static struct request * __get_request_wait(int n, kdev_t dev)
{
	register struct request *req;
	struct wait_queue wait = { current, NULL };

	add_wait_queue(&wait_for_request, &wait);
	for (;;) {
		current->state = TASK_UNINTERRUPTIBLE;
		cli();
		req = get_request(n, dev);
		sti();
		if (req)
			break;
		run_task_queue(&tq_disk);
		schedule();
	}
	remove_wait_queue(&wait_for_request, &wait);
	current->state = TASK_RUNNING;
	return req;
}

static inline struct request * get_request_wait(int n, kdev_t dev)
{
	register struct request *req;

	cli();
	req = get_request(n, dev);
	sti();
	if (req)
		return req;
	return __get_request_wait(n, dev);
}

/* RO fail safe mechanism */

static long ro_bits[MAX_BLKDEV][8];

int is_read_only(kdev_t dev)
{
	int minor,major;

	major = MAJOR(dev);
	minor = MINOR(dev);
	if (major < 0 || major >= MAX_BLKDEV) return 0;
	return ro_bits[major][minor >> 5] & (1 << (minor & 31));
}

void set_device_ro(kdev_t dev,int flag)
{
	int minor,major;

	major = MAJOR(dev);
	minor = MINOR(dev);
	if (major < 0 || major >= MAX_BLKDEV) return;
	if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
	else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
}

static inline void drive_stat_acct(int cmd, unsigned long nr_sectors,
                                   short disk_index)
{
	kstat.dk_drive[disk_index]++;
	if (cmd == READ) {
		kstat.dk_drive_rio[disk_index]++;
		kstat.dk_drive_rblk[disk_index] += nr_sectors;
	} else if (cmd == WRITE) {
		kstat.dk_drive_wio[disk_index]++;
		kstat.dk_drive_wblk[disk_index] += nr_sectors;
	} else
		printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
}


/*
 * add-request adds a request to the linked list.
 * It disables interrupts so that it can muck with the
 * request-lists in peace.
 *
 * By this point, req->cmd is always either READ/WRITE, never READA/WRITEA,
 * which is important for drive_stat_acct() above.
 */

void add_request(struct blk_dev_struct * dev, struct request * req)
{
	struct request * tmp;
	short		 disk_index;

	switch (MAJOR(req->rq_dev)) {
		case SCSI_DISK_MAJOR:
			disk_index = (MINOR(req->rq_dev) & 0x0070) >> 4;
			if (disk_index < 4)
				drive_stat_acct(req->cmd, req->nr_sectors, disk_index);
			break;
		case IDE0_MAJOR:	/* same as HD_MAJOR */
		case XT_DISK_MAJOR:
			disk_index = (MINOR(req->rq_dev) & 0x0040) >> 6;
			drive_stat_acct(req->cmd, req->nr_sectors, disk_index);
			break;
		case IDE1_MAJOR:
			disk_index = ((MINOR(req->rq_dev) & 0x0040) >> 6) + 2;
			drive_stat_acct(req->cmd, req->nr_sectors, disk_index);
		default:
			break;
	}

	req->next = NULL;
	cli();
	if (req->bh)
		mark_buffer_clean(req->bh);
	if (!(tmp = dev->current_request)) {
		dev->current_request = req;
		(dev->request_fn)();
		sti();
		return;
	}
	for ( ; tmp->next ; tmp = tmp->next) {
		if ((IN_ORDER(tmp,req) ||
		    !IN_ORDER(tmp,tmp->next)) &&
		    IN_ORDER(req,tmp->next))
			break;
	}
	req->next = tmp->next;
	tmp->next = req;

/* for SCSI devices, call request_fn unconditionally */
	if (scsi_blk_major(MAJOR(req->rq_dev)))
		(dev->request_fn)();

	sti();
}


static void make_request(int major,int rw, struct buffer_head * bh)
{
	unsigned int sector, count;
	struct request * req;
	int rw_ahead, max_req;

	count = bh->b_size >> 9;
	sector = bh->b_rsector;

	/* Uhhuh.. Nasty dead-lock possible here.. */
	if (buffer_locked(bh))
		return;
	/* Maybe the above fixes it, and maybe it doesn't boot. Life is interesting */

	lock_buffer(bh);

	if (blk_size[major])
		if (blk_size[major][MINOR(bh->b_rdev)] < (sector + count)>>1) {
			bh->b_state &= (1 << BH_Lock) | (1 << BH_FreeOnIO);
                        /* This may well happen - the kernel calls bread()
                           without checking the size of the device, e.g.,
                           when mounting a device. */
			printk(KERN_INFO
                               "attempt to access beyond end of device\n");
			printk(KERN_INFO "%s: rw=%d, want=%d, limit=%d\n",
                               kdevname(bh->b_rdev), rw,
                               (sector + count)>>1,
                               blk_size[major][MINOR(bh->b_rdev)]);
			unlock_buffer(bh);
			return;
		}

	rw_ahead = 0;	/* normal case; gets changed below for READA/WRITEA */
	switch (rw) {
		case READA:
			rw_ahead = 1;
			rw = READ;	/* drop into READ */
		case READ:
			if (buffer_uptodate(bh)) {
				unlock_buffer(bh); /* Hmmph! Already have it */
				return;
			}
			kstat.pgpgin++;
			max_req = NR_REQUEST;	/* reads take precedence */
			break;
		case WRITEA:
			rw_ahead = 1;
			rw = WRITE;	/* drop into WRITE */
		case WRITE:
			if (!buffer_dirty(bh)) {
				unlock_buffer(bh); /* Hmmph! Nothing to write */
				return;
			}
			/* We don't allow the write-requests to fill up the
			 * queue completely:  we want some room for reads,
			 * as they take precedence. The last third of the
			 * requests are only for reads.
			 */
			kstat.pgpgout++;
			max_req = (NR_REQUEST * 2) / 3;
			break;
		default:
			printk(KERN_ERR "make_request: bad block dev cmd,"
                               " must be R/W/RA/WA\n");
			unlock_buffer(bh);
			return;
	}

/* look for a free request. */
       /* Loop uses two requests, 1 for loop and 1 for the real device.
        * Cut max_req in half to avoid running out and deadlocking. */
        if (major == LOOP_MAJOR)
	     max_req >>= 1;

	/*
	 * Try to coalesce the new request with old requests
	 */
	cli();
	req = blk_dev[major].current_request;
	if (!req) {
		/* MD and loop can't handle plugging without deadlocking */
		if (major != MD_MAJOR && major != LOOP_MAJOR)
			plug_device(blk_dev + major);
	} else switch (major) {
	     case IDE0_MAJOR:	/* same as HD_MAJOR */
	     case IDE1_MAJOR:
	     case FLOPPY_MAJOR:
	     case IDE2_MAJOR:
	     case IDE3_MAJOR:
		/*
		 * The scsi disk and cdrom drivers completely remove the request
		 * from the queue when they start processing an entry.  For this
		 * reason it is safe to continue to add links to the top entry for
		 * those devices.
		 *
		 * All other drivers need to jump over the first entry, as that
		 * entry may be busy being processed and we thus can't change it.
		 */
	        req = req->next;
		if (!req)
			break;
		/* fall through */

	     case SCSI_DISK_MAJOR:
	     case SCSI_CDROM_MAJOR:

		do {
			if (req->sem)
				continue;
			if (req->cmd != rw)
				continue;
			if (req->nr_sectors >= 244)
				continue;
			if (req->rq_dev != bh->b_rdev)
				continue;
			/* Can we add it to the end of this request? */
			if (req->sector + req->nr_sectors == sector) {
				req->bhtail->b_reqnext = bh;
				req->bhtail = bh;
			/* or to the beginning? */
			} else if (req->sector - count == sector) {
			    	bh->b_reqnext = req->bh;
			    	req->bh = bh;
			    	req->buffer = bh->b_data;
			    	req->current_nr_sectors = count;
			    	req->sector = sector;
			} else
				continue;

		    	req->nr_sectors += count;
			mark_buffer_clean(bh);
		    	sti();
		    	return;
		} while ((req = req->next) != NULL);
	}

/* find an unused request. */
	req = get_request(max_req, bh->b_rdev);
	sti();

/* if no request available: if rw_ahead, forget it; otherwise try again blocking.. */
	if (!req) {
		if (rw_ahead) {
			unlock_buffer(bh);
			return;
		}
		req = __get_request_wait(max_req, bh->b_rdev);
	}

/* fill up the request-info, and add it to the queue */
	req->cmd = rw;
	req->errors = 0;
	req->sector = sector;
	req->nr_sectors = count;
	req->current_nr_sectors = count;
	req->buffer = bh->b_data;
	req->sem = NULL;
	req->bh = bh;
	req->bhtail = bh;
	req->next = NULL;
	add_request(major+blk_dev,req);
}


/* This function can be used to request a number of buffers from a block
   device. Currently the only restriction is that all buffers must belong to
   the same device */

void ll_rw_block(int rw, int nr, struct buffer_head * bh[])
{
	unsigned int major;
	int correct_size;
	struct blk_dev_struct * dev;
	int i;

	/* Make sure that the first block contains something reasonable */
	while (!*bh) {
		bh++;
		if (--nr <= 0)
			return;
	}

	dev = NULL;
	if ((major = MAJOR(bh[0]->b_dev)) < MAX_BLKDEV)
		dev = blk_dev + major;
	if (!dev || !dev->request_fn) {
		printk(KERN_ERR
	"ll_rw_block: Trying to read nonexistent block-device %s (%ld)\n",
		kdevname(bh[0]->b_dev), bh[0]->b_blocknr);
		goto sorry;
	}

	/* Determine correct block size for this device.  */
	correct_size = BLOCK_SIZE;
	if (blksize_size[major]) {
		i = blksize_size[major][MINOR(bh[0]->b_dev)];
		if (i)
			correct_size = i;
	}

	/* Verify requested block sizes.  */
	for (i = 0; i < nr; i++) {
		if (bh[i] && bh[i]->b_size != correct_size) {
			printk(KERN_NOTICE "ll_rw_block: device %s: "
			       "only %d-char blocks implemented (%lu)\n",
			       kdevname(bh[0]->b_dev),
			       correct_size, bh[i]->b_size);
			goto sorry;
		}

		/* Md remaps blocks now */
		bh[i]->b_rdev = bh[i]->b_dev;
		bh[i]->b_rsector=bh[i]->b_blocknr*(bh[i]->b_size >> 9);
#ifdef CONFIG_BLK_DEV_MD
		if (major==MD_MAJOR &&
		    md_map (MINOR(bh[i]->b_dev), &bh[i]->b_rdev,
			    &bh[i]->b_rsector, bh[i]->b_size >> 9)) {
		        printk (KERN_ERR
				"Bad md_map in ll_rw_block\n");
		        goto sorry;
		}
#endif
	}

	if ((rw == WRITE || rw == WRITEA) && is_read_only(bh[0]->b_dev)) {
		printk(KERN_NOTICE "Can't write to read-only device %s\n",
		       kdevname(bh[0]->b_dev));
		goto sorry;
	}

	for (i = 0; i < nr; i++) {
		if (bh[i]) {
			set_bit(BH_Req, &bh[i]->b_state);
			make_request(MAJOR(bh[i]->b_rdev), rw, bh[i]);
		}
	}
	return;

      sorry:
	for (i = 0; i < nr; i++) {
		if (bh[i]) {
			clear_bit(BH_Dirty, &bh[i]->b_state);
			clear_bit(BH_Uptodate, &bh[i]->b_state);
		}
	}
	return;
}

void ll_rw_swap_file(int rw, kdev_t dev, unsigned int *b, int nb, char *buf)
{
	int i, j;
	int buffersize;
	int max_req;
	unsigned long rsector;
	kdev_t rdev;
	struct request * req[8];
	unsigned int major = MAJOR(dev);
	struct semaphore sem = MUTEX_LOCKED;

	if (major >= MAX_BLKDEV || !(blk_dev[major].request_fn)) {
		printk(KERN_NOTICE "ll_rw_swap_file: trying to swap to"
                                   " nonexistent block-device\n");
		return;
	}
	max_req = NR_REQUEST;
	switch (rw) {
		case READ:
			break;
		case WRITE:
			max_req = (NR_REQUEST * 2) / 3;
			if (is_read_only(dev)) {
				printk(KERN_NOTICE
                                       "Can't swap to read-only device %s\n",
					kdevname(dev));
				return;
			}
			break;
		default:
			panic("ll_rw_swap: bad block dev cmd, must be R/W");
	}
	buffersize = PAGE_SIZE / nb;

	if (major == LOOP_MAJOR)
	     max_req >>= 1;
	for (j=0, i=0; i<nb;)
	{
		for (; j < 8 && i < nb; j++, i++, buf += buffersize)
		{
		        rdev = dev;
			rsector = (b[i] * buffersize) >> 9;
#ifdef CONFIG_BLK_DEV_MD
			if (major==MD_MAJOR &&
			    md_map (MINOR(dev), &rdev,
				    &rsector, buffersize >> 9)) {
			        printk (KERN_ERR
                                        "Bad md_map in ll_rw_swap_file\n");
				return;
			}
#endif
			
			if (j == 0) {
				req[j] = get_request_wait(max_req, rdev);
			} else {
				cli();
				req[j] = get_request(max_req, rdev);
				sti();
				if (req[j] == NULL)
					break;
			}
			req[j]->cmd = rw;
			req[j]->errors = 0;
			req[j]->sector = rsector;
			req[j]->nr_sectors = buffersize >> 9;
			req[j]->current_nr_sectors = buffersize >> 9;
			req[j]->buffer = buf;
			req[j]->sem = &sem;
			req[j]->bh = NULL;
			req[j]->next = NULL;
			add_request(MAJOR(rdev)+blk_dev,req[j]);
		}
		run_task_queue(&tq_disk);
		while (j > 0) {
			j--;
			down(&sem);
		}
	}
}

int blk_dev_init(void)
{
	struct request * req;
	struct blk_dev_struct *dev;

	for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;) {
		dev->request_fn      = NULL;
		dev->current_request = NULL;
		dev->plug.rq_status  = RQ_INACTIVE;
		dev->plug.cmd        = -1;
		dev->plug.next       = NULL;
		dev->plug_tq.routine = &unplug_device;
		dev->plug_tq.data    = dev;
	}

	req = all_requests + NR_REQUEST;
	while (--req >= all_requests) {
		req->rq_status = RQ_INACTIVE;
		req->next = NULL;
	}
	memset(ro_bits,0,sizeof(ro_bits));
#ifdef CONFIG_BLK_DEV_RAM
	rd_init();
#endif
#ifdef CONFIG_BLK_DEV_LOOP
	loop_init();
#endif
#ifdef CONFIG_CDI_INIT
	cdi_init();		/* this MUST precede ide_init */
#endif CONFIG_CDI_INIT
#ifdef CONFIG_BLK_DEV_IDE
	ide_init();		/* this MUST precede hd_init */
#endif
#ifdef CONFIG_BLK_DEV_HD
	hd_init();
#endif
#ifdef CONFIG_BLK_DEV_XD
	xd_init();
#endif
#ifdef CONFIG_BLK_DEV_FD
	floppy_init();
#else
	outb_p(0xc, 0x3f2);
#endif
#ifdef CONFIG_CDU31A
	cdu31a_init();
#endif CONFIG_CDU31A
#ifdef CONFIG_MCD
	mcd_init();
#endif CONFIG_MCD
#ifdef CONFIG_MCDX
	mcdx_init();
#endif CONFIG_MCDX
#ifdef CONFIG_SBPCD
	sbpcd_init();
#endif CONFIG_SBPCD
#ifdef CONFIG_AZTCD
        aztcd_init();
#endif CONFIG_AZTCD
#ifdef CONFIG_CDU535
	sony535_init();
#endif CONFIG_CDU535
#ifdef CONFIG_GSCD
	gscd_init();
#endif CONFIG_GSCD
#ifdef CONFIG_CM206
	cm206_init();
#endif
#ifdef CONFIG_OPTCD
	optcd_init();
#endif CONFIG_OPTCD
#ifdef CONFIG_SJCD
	sjcd_init();
#endif CONFIG_SJCD
#ifdef CONFIG_BLK_DEV_MD
	md_init();
#endif CONFIG_BLK_DEV_MD
	return 0;
}