/*
 *  linux/arch/i386/mm/fault.c
 *
 *  Copyright (C) 1995  Linus Torvalds
 */

#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/head.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>

#include <asm/system.h>
#include <asm/segment.h>
#include <asm/pgtable.h>

extern void die_if_kernel(const char *,struct pt_regs *,long);

asmlinkage void do_divide_error (struct pt_regs *, unsigned long);
asmlinkage void do_debug (struct pt_regs *, unsigned long);
asmlinkage void do_nmi (struct pt_regs *, unsigned long);
asmlinkage void do_int3 (struct pt_regs *, unsigned long);
asmlinkage void do_overflow (struct pt_regs *, unsigned long);
asmlinkage void do_bounds (struct pt_regs *, unsigned long);
asmlinkage void do_invalid_op (struct pt_regs *, unsigned long);
asmlinkage void do_general_protection (struct pt_regs *, unsigned long);

extern int pentium_f00f_bug;

static int handle_intx_eip_adjust(struct pt_regs *regs)
{
	unsigned char *addr, *csp = 0;
	int wrap = 0;
	int count = 8; /* only check for reasonable number of bytes
			 * else we do it the save 'simple way' */
	unsigned long _eip;
#define XX_WRAP(x) (wrap ? *((unsigned short *)&x) : x)

	/* We rely on being able to access the memory pointed to by cs:eip
	 * and the bytes behind it up to the faulting instruction,
	 * because we just got an exception for this instruction and
	 * hence the memory should just be successfully accessed.
	 * In case of crossing a page boundary or when accessing kernel space
	 * we just do the simple fix (increase eip by one).
	 * This assumption also obsoletes checking of segment limit.
	 * ( should be veryfied, however, if this assumption is true )
	 */

	if (regs->cs == KERNEL_CS) {
		/* not what we expect */
		regs->eip++;
		return 0;
	}

	if (regs->eflags & VM_MASK) {
		/* we have real mode type selector */
		wrap = 1;
		csp = (unsigned char *)((unsigned long)regs->cs << 4);
	}
	else if (regs->cs & 4) {
		/* we have a LDT selector */
		struct desc_struct *p, *ldt = current->ldt;
		if (!ldt)
			ldt = (struct desc_struct*) &default_ldt;
		p = ldt + (regs->cs >> 3);
		csp = (unsigned char *)((p->a >> 16) | ((p->b & 0xff) << 16) | (p->b &
0xFF000000));
		if (!(p->b & 0x400000))
			wrap = 1;	/* 16-bit segment */
	}

	_eip = regs->eip;
	addr = csp+XX_WRAP(_eip);
	while (count-- > 0) {
		if ((unsigned long)addr >= TASK_SIZE) {
			/* accessing kernel space, do the simple case */
			regs->eip++;
			return 0;
		}
		switch (get_user(addr)) {

			case 0xCC:	/* single byte INT3 */
				XX_WRAP(_eip)++;
				regs->eip = _eip;
				return 0;

			case 0xCD:	/* two byte INT 3 */
				XX_WRAP(_eip)++;
				/* fall through */
			case 0xCE:	/* INTO, single byte */
				XX_WRAP(_eip)++;
				if ( (regs->eflags & VM_MASK)
					&& ((regs->eflags & IOPL_MASK) != IOPL_MASK)) {
					/* not allowed, do GP0 fault */
					do_general_protection(regs, 0);
					return -1;
				}
				regs->eip = _eip;
				return 0;

					/* the prefixes from the Intel patch */
			case 0xF2 ... 0xF3:
			case 0x2E:
			case 0x36:
			case 0x3E:
			case 0x26:
			case 0x64 ... 0x67:
				break;	/* just skipping them */

			default:
				/* not what we handle here,
				 * just doing the simple fix
				 */
				regs->eip++;
				return 0;
		}

		if ( !(++XX_WRAP(_eip)) ) {
			/* we wrapped around */
			regs->eip++;
			return 0;
		}

		addr = csp+XX_WRAP(_eip);
		if ( !((unsigned long)addr & ~(PAGE_SIZE -1)) ) {
			/* we would cross page boundary, not good,
			 * doing the simple fix
			 */
			regs->eip++;
			return 0;
		}
	}

	/* if we come here something weird happened,
	 * just doing the simple fix
	 */
	regs->eip++;
	return 0;
}

/*
 * This routine handles page faults.  It determines the address,
 * and the problem, and then passes it off to one of the appropriate
 * routines.
 *
 * error_code:
 *	bit 0 == 0 means no page found, 1 means protection fault
 *	bit 1 == 0 means read, 1 means write
 *	bit 2 == 0 means kernel, 1 means user-mode
 */
asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
	void (*handler)(struct task_struct *,
			struct vm_area_struct *,
			unsigned long,
			int);
	struct task_struct *tsk = current;
	struct mm_struct *mm = tsk->mm;
	struct vm_area_struct * vma;
	unsigned long address;
	unsigned long page;
	int write;

    // ********************************************************
    // pobranie adresu liniowego miejsca gdzie wystapil blad
    // i wpisanie go do zmiennej "address"
    // nastepnie wyznaczenie struktury vm_area_struct, ktorej
    // odpowiada dany adres. Jesli nie znaleziono - mamy
    // blad krytyczny
    // ********************************************************

    /* get the address */
	__asm__("movl %%cr2,%0":"=r" (address));
	down(&mm->mmap_sem);
	vma = find_vma(mm, address);
	if (!vma)
		goto bad_area;

    // ********************************************************
    // find_vma zwrocilo nam takie vm_area_struct,
    // aby address byl mniejszy niz gorna granica bloku pamieci
    // musimy jeszcze sprawdzic czy z dolu jest wszystko w
    // porzadku. Jesli nie przekroczylismy dolnej granicy -
    // na pewno wszystko jest ok.
    // ********************************************************

    if (vma->vm_start <= address)
		goto good_area;

    // ********************************************************
    // Moze byc jeden przypadek, kiedy przekroczenie dolnej
    // granicy obszaru pamieci jest dopuszczalna - w przypadku
    // stosu. Stos rozrasta sie w kierunku mniejszych adresow
    // ********************************************************
    if (!(vma->vm_flags & VM_GROWSDOWN))
		goto bad_area;

    // ********************************************************
    // W przypadku kodu procesu uzytkownika sprawdzamy czy
    // jest to zadanie rozszerzenia stosu o wiecej niz 32
    // bajty (nigdy sie normalnie nie powinno zdarzyc gdyz
    // jedyny legalny sposob korzystania ze stosu to
    // wykonywanie operacji push/pop po 4 bajty lub pusha/popa
    // ktora zapisuje cala zawartosc wszystkich rejestrow)
    //
    // Programy uzytkowe moga czytac i pisac na stos tylko
    // powyzej jego wskaznika. W przypadku jadra zakladamy
    // milczaco ze jest napisane poprawnie i zawsze pozwalamy
    // rozszerzyc stos
    // ********************************************************

	if (error_code & 4) {
		/*
		 * accessing the stack below %esp is always a bug.
		 * The "+ 32" is there due to some instructions (like
		 * pusha) doing pre-decrement on the stack and that
		 * doesn't show up until later..
		 */
		if (address + 32 < regs->esp)
			goto bad_area;
	}
	if (expand_stack(vma, address))
		goto bad_area;

    // *********************************************************
    // na zmiennej handler bedziemy trzymac adres procedury,
    // ktora nalezy wywolac jako reakcja na blad strony
    //
    // mamy 4 przypadki:
    //      1. zapis do istniejecej strony (wywolujemy do_wp_page)
    //      2. zapis do nieistniejacej strony
    //          jesli obszar pamieci pozwala na zapis to
    //          wywolujemy do_no_page wpp. blad
    //      3. odczyt z istniejacej strony - cos sie musialo
    //          popsuc wiec wywolujemy blad
    //      4. odczyt z nieistniejacej strony - jesli obszar
    //          mozna czytac to wywolujemy do_no_page
    //          wpp. blad
    // *********************************************************
    //
/*
 * Ok, we have a good vm_area for this memory access, so
 * we can handle it..
 */
good_area:
	write = 0;
	handler = do_no_page;
	switch (error_code & 3) {
		default:	/* 3: write, present */
			handler = do_wp_page;
#ifdef TEST_VERIFY_AREA
			if (regs->cs == KERNEL_CS)
				printk("WP fault at %08lx\n", regs->eip);
#endif
			/* fall through */
		case 2:		/* write, not present */
			if (!(vma->vm_flags & VM_WRITE))
				goto bad_area;
			write++;
			break;
		case 1:		/* read, present */
			goto bad_area;
		case 0:		/* read, not present */
			if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
				goto bad_area;
	}
	handler(tsk, vma, address, write);
	up(&mm->mmap_sem);

    // ********************************************************
    // jesli proces pisal do pamieci ekranu DOS'u
    // trzeba uaktualnic wyswietlanie. screen_bitmap
    // jest maska bitowa, ktora mowi ktora strona w obszarze
    // pamieci ekranu (128 kB = 32 x 4kB) wymaga odswiezenia
    // ********************************************************

    /*
	 * Did it hit the DOS screen memory VA from vm86 mode?
	 */
	if (regs->eflags & VM_MASK) {
		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
		if (bit < 32)
			tsk->tss.screen_bitmap |= 1 << bit;
	}
	return;

    // ********************************************************
    // tutaj obslugujemy wszystkie nieprawidlowe odwolania do
    // obszarow pamieci - w przypadku procesu uzytkownika
    // wysylamy mu sygnal SIGSEGV - segment violation
    // co przerywa proces
    // ********************************************************
/*
 * Something tried to access memory that isn't in our memory map..
 * Fix it, but check if it's kernel or user first..
 */
bad_area:
	up(&mm->mmap_sem);
	if (error_code & 4) {
		tsk->tss.cr2 = address;
		tsk->tss.error_code = error_code;
		tsk->tss.trap_no = 14;
		force_sig(SIGSEGV, tsk);
		return;
	}

    // ********************************************************
    // obsluga bledu Pentium
    // ********************************************************

    /*
	 * Pentium F0 0F C7 C8 bug workaround:
	 */
	if ( pentium_f00f_bug ) {
		unsigned long nr;

		nr = (address - TASK_SIZE - (unsigned long) idt) >> 3;

		if (nr < 7) {
			static void (*handler[])(struct pt_regs *, unsigned long) = {
				do_divide_error,	/* 0 - divide overflow */
				do_debug,		/* 1 - debug trap */
				do_nmi,			/* 2 - NMI */
				do_int3,		/* 3 - int 3 */
				do_overflow,		/* 4 - overflow */
				do_bounds,		/* 5 - bound range */
				do_invalid_op };	/* 6 - invalid opcode */
			if ((nr == 3) || (nr == 4))
				if (handle_intx_eip_adjust(regs))
					return;
			handler[nr](regs, error_code);
			return;
		}
	}

    // ********************************************************
    // bledny dostep do strony w przypadku jadra
    // moze to byc test podczas startu systemu - wtedy drukujemy
    // tylko komunikat i wychodzimy
    // podczas normalnej pracy "kernel panikuje"
    // ********************************************************

/*
 * Oops. The kernel tried to access some bad page. We'll have to
 * terminate things with extreme prejudice.
 *
 * First we check if it was the bootup rw-test, though..
 */
	if (wp_works_ok < 0 && address == TASK_SIZE && (error_code & 1)) {
		wp_works_ok = 1;
		pg0[0] = pte_val(mk_pte(0, PAGE_SHARED));
		flush_tlb();
		printk("This processor honours the WP bit even when in supervisor mode.
Good.\n");
		return;
	}
	if ((unsigned long) (address-TASK_SIZE) < PAGE_SIZE)
		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
	else
		printk(KERN_ALERT "Unable to handle kernel paging request");
	printk(" at virtual address %08lx\n",address);
	__asm__("movl %%cr3,%0" : "=r" (page));
	printk(KERN_ALERT "current->tss.cr3 = %08lx, %%cr3 = %08lx\n",
		tsk->tss.cr3, page);
	page = ((unsigned long *) page)[address >> 22];
	printk(KERN_ALERT "*pde = %08lx\n", page);
	if (page & 1) {
		page &= PAGE_MASK;
		address &= 0x003ff000;
		page = ((unsigned long *) page)[address >> PAGE_SHIFT];
		printk(KERN_ALERT "*pte = %08lx\n", page);
	}
	die_if_kernel("Oops", regs, error_code);
	do_exit(SIGKILL);
}