linux/mm/mmap.c

/*
 *	linux/mm/mmap.c
 *
 * Written by obz.
 */
#include <linux/stat.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/shm.h>
#include <linux/errno.h>
#include <linux/mman.h>
#include <linux/string.h>
#include <linux/malloc.h>
#include <linux/pagemap.h>
#include <linux/swap.h>

#include <asm/segment.h>
#include <asm/system.h>
#include <asm/pgtable.h>

/*
 * description of effects of mapping type and prot in current implementation.
 * this is due to the limited x86 page protection hardware.  The expected
 * behavior is in parens:
 *
 * map_type	prot
 *		PROT_NONE	PROT_READ	PROT_WRITE	PROT_EXEC
 * MAP_SHARED	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
 *		w: (no) no	w: (no) no	w: (yes) yes	w: (no) no
 *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
 *
 * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
 *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
 *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
 *
 */

pgprot_t protection_map[16] = {
	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
};

/*
 * Check that a process has enough memory to allocate a
 * new virtual mapping.
 */
static inline int vm_enough_memory(long pages)
{
	/*
	 * stupid algorithm to decide if we have enough memory: while
	 * simple, it hopefully works in most obvious cases.. Easy to
	 * fool it, but this should catch most mistakes.
	 */
	long freepages;
	freepages = buffermem >> PAGE_SHIFT;
	freepages += page_cache_size;
	if (freepages <= (MAP_NR(high_memory) >> 4) + 48)
		freepages >>= 1;
	freepages += nr_free_pages;
	freepages += nr_swap_pages;
	freepages -= MAP_NR(high_memory) >> 4;
	return freepages > pages;
}

asmlinkage unsigned long sys_brk(unsigned long brk)
{
	unsigned long rlim;
	unsigned long newbrk, oldbrk;
	struct mm_struct *mm = current->mm;

	if (brk < mm->end_code)
		return mm->brk;
	newbrk = PAGE_ALIGN(brk);
	oldbrk = PAGE_ALIGN(mm->brk);
	if (oldbrk == newbrk)
		return mm->brk = brk;

	/*
	 * Always allow shrinking brk
	 */
	if (brk <= mm->brk) {
		mm->brk = brk;
		do_munmap(newbrk, oldbrk-newbrk);
		return brk;
	}
	/*
	 * Check against rlimit and stack..
	 */
	rlim = current->rlim[RLIMIT_DATA].rlim_cur;
	if (rlim >= RLIM_INFINITY)
		rlim = ~0;
	if (brk - mm->end_code > rlim)
		return mm->brk;

	/*
	 * Check against existing mmap mappings.
	 */
	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
		return mm->brk;

	/*
	 * Check if we have enough memory..
	 */
	if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
		return mm->brk;

	/*
	 * Ok, looks good - let it rip.
	 */
	if(do_mmap(NULL, oldbrk, newbrk-oldbrk,
		PROT_READ|PROT_WRITE|PROT_EXEC,
		   MAP_FIXED|MAP_PRIVATE, 0) != oldbrk)
		return mm->brk;
	return mm->brk = brk;
}

/*
 * Combine the mmap "prot" and "flags" argument into one "vm_flags" used
 * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
 * into "VM_xxx".
 */
static inline unsigned long vm_flags(unsigned long prot, unsigned long flags)
{
#define _trans(x,bit1,bit2) \
((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)

	unsigned long prot_bits, flag_bits;
	prot_bits =
		_trans(prot, PROT_READ, VM_READ) |
		_trans(prot, PROT_WRITE, VM_WRITE) |
		_trans(prot, PROT_EXEC, VM_EXEC);
	flag_bits =
		_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
		_trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
		_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
	return prot_bits | flag_bits;
#undef _trans
}

unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len,
	unsigned long prot, unsigned long flags, unsigned long off)
/*  Przedziela pamięć na nowa strukturę typu vm_area_struct oraz następuje
    jej inicjalizowanie
    Zwalnia poprzednie obszary o zachodzących na siebie adresach
    Wywołuje funkcje charakterystyczną dla danego systemu plików
    i kończy inicjalizowanie struktury typu vm_area_struct
    Następnie struktura ta jest dostawiana do struktury zawierającej
    informacje o ramkach procesu.

    argumenty funkcji:
    file - określenie pliku, który ma być mapowany,
	   Jeśli zostanie podane NULL to przydzielony segment nie będzie
	   związany z żadnym plikiem (będzie "normalnym" segmentem)
    addr - adres w pamięci pod który chcemy mapować (jeśli podajemy konkretny
	   adres flaga MAP_FIXED musi być ustawiona)
	   Jesli zostanie podane NULL system sam przydzieli odpowiedni adres
    len	 - wielkość obszaru do przydzielenia
    prot, flags- flagi ochrony i dzielenia zdefiniowane w pliku asm/mmap.h
    off	 - offset w pliku. Jeśli pamięć ma być dzielona to offset musi być
	   wielokrotnościa rozmiaru strony

    Zwracany jest przydzielony adres lub odpowiedni kod błędu (<0)
    EINVAL  - niewłaściwy argument
    EAGAIN  - spróbuj ponownie
    EACCES  - dostęp zabroniony
    ETXTBSY - plik tekstowo zajęty
    ENOMEM  - brak pamięci
    ENODEV  - nie ma takiego urządzenia
*/

{
	struct mm_struct * mm = current->mm;
	struct vm_area_struct * vma;

	if ((len = PAGE_ALIGN(len)) == 0)
		return addr;
	/*PAGE_ALIGN(len) zwraca len gdy (len mod PAGE_SIZE == 0)
			   wpp (PAGE_SIZE*((len div PAGE_SIZE) + 1)
	  PAGE_SIZE - rozmiar strony */

	if (len > TASK_SIZE || addr > TASK_SIZE-len)
		return -EINVAL; /* próba przydzielenia za dużej liczby
				    pamięci lub w niewłaściwym obszarze */

	/* offset overflow? */
	if (off + len < off)
		return -EINVAL;

	/* mlock MCL_FUTURE? */
	if (mm->def_flags & VM_LOCKED) {
		unsigned long locked = mm->locked_vm << PAGE_SHIFT;
		locked += len;
		if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
			return -EAGAIN;
	}

	/*
	 * do simple checking here so the lower-level routines won't have
	 * to. we assume access permissions have been handled by the open
	 * of the memory object, so we don't do any here.
	 */

	if (file != NULL) {/*sprawdzenie praw dostępu*/
		switch (flags & MAP_TYPE) {
		case MAP_SHARED: /*obszar dzielony */
			if ((prot & PROT_WRITE) && !(file->f_mode & 2))
				return -EACCES; /*błąd dostępu */
			/* PROT_WRITE - strona może być modyfikowana*/
			/*
			 * make sure there are no mandatory locks on the file.
			 */
			if (locks_verify_locked(file->f_inode))
				return -EAGAIN;/* spróbuj ponownie */
			/* fall through */
		case MAP_PRIVATE:/*obszar niedzielony*/
			if (!(file->f_mode & 1))
				return -EACCES;/*błąd dostępu*/
			break;

		default:
			return -EINVAL;/*niewłaściwy argument*/
		}
		if (flags & MAP_DENYWRITE) {
			if (file->f_inode->i_writecount > 0)
				return -ETXTBSY;/*plik tekstowo zajęty */
		}
	} else if ((flags & MAP_TYPE) != MAP_PRIVATE)
		return -EINVAL;

	/*
	 * obtain the address to map to. we verify (or select) it and ensure
	 * that it represents a valid section of the address space.
	 */

	if (flags & MAP_FIXED) {/*podaliśmy konkretny adres */
		if (addr & ~PAGE_MASK)
			return -EINVAL;
	} else {/*przydzielamy adres obszaru o długosci len */
		addr = get_unmapped_area(addr, len);
		if (!addr)
			return -ENOMEM;/*brak pamięci*/
	}

	/*
	 * determine the object being mapped and call the appropriate
	 * specific mapper. the address has already been validated, but
	 * not unmapped, but the maps are removed from the list.
	 */
	if (file && (!file->f_op || !file->f_op->mmap))
		return -ENODEV;/*plik nie ma zdefinowanej struktury
				używanej przy wymianie dotyczącej
				urządzenia na jakim znajduje się plik */

	vma = (struct vm_area_struct *)kmalloc(sizeof(struct vm_area_struct),
		GFP_KERNEL);
	if (!vma)
		return -ENOMEM;
	/* inicjalizowanie vma */
	vma->vm_mm = mm;
	vma->vm_start = addr;
	vma->vm_end = addr + len;
	vma->vm_flags = vm_flags(prot,flags) | mm->def_flags;

	if (file) {
		if (file->f_mode & 1)
			vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
		if (flags & MAP_SHARED) {
			vma->vm_flags |= VM_SHARED | VM_MAYSHARE;
			/*
			 * This looks strange, but when we don't have the file open
			 * for writing, we can demote the shared mapping to a simpler
			 * private mapping. That also takes care of a security hole
			 * with ptrace() writing to a shared mapping without write
			 * permissions.
			 *
			 * We leave the VM_MAYSHARE bit on, just to get correct output
			 * from /proc/xxx/maps..
			 */
			if (!(file->f_mode & 2))
				vma->vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
		}
	} else
		vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
	vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f];
	vma->vm_ops = NULL;
	vma->vm_offset = off;
	vma->vm_inode = NULL;
	vma->vm_pte = 0;

	do_munmap(addr, len);	/* Clear old maps */

	/* Check against address space limit. */
	if ((mm->total_vm << PAGE_SHIFT) + len
	    > current->rlim[RLIMIT_AS].rlim_cur) {
		kfree(vma);
		return -ENOMEM;
	}

	/* Private writable mapping? Check memory availability.. */
	if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE) {
		if (!vm_enough_memory(len >> PAGE_SHIFT)) {
			kfree(vma);
			return -ENOMEM;
		}
	}

	if (file) {
		/* zakończenie inicjalizowania struktury vma, mmap wykonuje
		 się zależnie od systemu plików z którego pochodzi "file" */

		int error = file->f_op->mmap(file->f_inode, file, vma);

		if (error) {
			kfree(vma);
			return error;
		}
	}

	flags = vma->vm_flags;
	insert_vm_struct(mm, vma);

	/* łączenie segmentów - możliwe, że nasze vma zazębia się początkiem
	   lub końcem z jakimś innym vma z którym można go połączyć */
	merge_segments(mm, vma->vm_start, vma->vm_end);

	/* merge_segments might have merged our vma, so we can't use it any more */
	mm->total_vm += len >> PAGE_SHIFT;
	if (flags & VM_LOCKED) {
		unsigned long start = addr;
		mm->locked_vm += len >> PAGE_SHIFT;
		do {
			char c = get_user((char *) start);
			len -= PAGE_SIZE;
			start += PAGE_SIZE;
			__asm__ __volatile__("": :"r" (c));
		} while (len > 0);
	}
	return addr;
}

/*
 * Get an address range which is currently unmapped.
 * For mmap() without MAP_FIXED and shmat() with addr=0.
 * Return value 0 means ENOMEM.
 */
unsigned long get_unmapped_area(unsigned long addr, unsigned long len)
/* Zwraca adres obszaru pod który możemy wstawić strukturę vma lub 0 gdy bład */
{
	struct vm_area_struct * vmm;

	if (len > TASK_SIZE)
		return 0; /* nie można przydzielić wiecej niż wynosi limit */
	if (!addr)
		addr = TASK_SIZE / 3;
	addr = PAGE_ALIGN(addr);
	/* PAGE_ALIGN(addr)=addr gdy (addr mod PAGE_SIZE)==0
			   =PAGE_SIZE*(addr div PAGE_SIZE +1)
	   PAGE_SIZE - rozmiar strony
	*/

	for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
		/* At this point:  (!vmm || addr < vmm->vm_end). */
		if (TASK_SIZE - len < addr)
			return 0; /* przekroczyliśmy zakres */
		if (!vmm || addr + len <= vmm->vm_start)
		/* zmieściliśmy się w luce pomiędzy dwoma strukturami vma */
			return addr;
		addr = vmm->vm_end;
	}
}

/*
 * Searching a VMA in the linear list task->mm->mmap is horribly slow.
 * Use an AVL (Adelson-Velskii and Landis) tree to speed up this search
 * from O(n) to O(log n), where n is the number of VMAs of the task
 * (typically around 6, but may reach 3000 in some cases).
 * Written by Bruno Haible <haible@ma2s2.mathematik.uni-karlsruhe.de>.
 */

/* We keep the list and tree sorted by address. */
#define vm_avl_key	vm_end
#define vm_avl_key_t	unsigned long	/* typeof(vma->avl_key) */

/*
 * task->mm->mmap_avl is the AVL tree corresponding to task->mm->mmap
 * or, more exactly, its root.
 * A vm_area_struct has the following fields:
 *   vm_avl_left     left son of a tree node
 *   vm_avl_right    right son of a tree node
 *   vm_avl_height   1+max(heightof(left),heightof(right))
 * The empty tree is represented as NULL.
 */

/* Since the trees are balanced, their height will never be large. */
#define avl_maxheight	41	/* why this? a small exercise */
#define heightof(tree)	((tree) == avl_empty ? 0 : (tree)->vm_avl_height)
/*
 * Consistency and balancing rules:
 * 1. tree->vm_avl_height == 1+max(heightof(tree->vm_avl_left),heightof(tree->vm_avl_right))
 * 2. abs( heightof(tree->vm_avl_left) - heightof(tree->vm_avl_right) ) <= 1
 * 3. foreach node in tree->vm_avl_left: node->vm_avl_key <= tree->vm_avl_key,
 *    foreach node in tree->vm_avl_right: node->vm_avl_key >= tree->vm_avl_key.
 */

/* Look up the nodes at the left and at the right of a given node. */
static inline void avl_neighbours (struct vm_area_struct * node, struct vm_area_struct * tree, struct vm_area_struct ** to_the_left, struct vm_area_struct ** to_the_right)
{
	vm_avl_key_t key = node->vm_avl_key;

	*to_the_left = *to_the_right = NULL;
	for (;;) {
		if (tree == avl_empty) {
			printk("avl_neighbours: node not found in the tree\n");
			return;
		}
		if (key == tree->vm_avl_key)
			break;
		if (key < tree->vm_avl_key) {
			*to_the_right = tree;
			tree = tree->vm_avl_left;
		} else {
			*to_the_left = tree;
			tree = tree->vm_avl_right;
		}
	}
	if (tree != node) {
		printk("avl_neighbours: node not exactly found in the tree\n");
		return;
	}
	if (tree->vm_avl_left != avl_empty) {
		struct vm_area_struct * node;
		for (node = tree->vm_avl_left; node->vm_avl_right != avl_empty; node = node->vm_avl_right)
			continue;
		*to_the_left = node;
	}
	if (tree->vm_avl_right != avl_empty) {
		struct vm_area_struct * node;
		for (node = tree->vm_avl_right; node->vm_avl_left != avl_empty; node = node->vm_avl_left)
			continue;
		*to_the_right = node;
	}
	if ((*to_the_left && ((*to_the_left)->vm_next != node)) || (node->vm_next != *to_the_right))
		printk("avl_neighbours: tree inconsistent with list\n");
}

/*
 * Rebalance a tree.
 * After inserting or deleting a node of a tree we have a sequence of subtrees
 * nodes[0]..nodes[k-1] such that
 * nodes[0] is the root and nodes[i+1] = nodes[i]->{vm_avl_left|vm_avl_right}.
 */
static inline void avl_rebalance (struct vm_area_struct *** nodeplaces_ptr, int count)
{
	for ( ; count > 0 ; count--) {
		struct vm_area_struct ** nodeplace = *--nodeplaces_ptr;
		struct vm_area_struct * node = *nodeplace;
		struct vm_area_struct * nodeleft = node->vm_avl_left;
		struct vm_area_struct * noderight = node->vm_avl_right;
		int heightleft = heightof(nodeleft);
		int heightright = heightof(noderight);
		if (heightright + 1 < heightleft) {
			/*							*/
			/*			      *				*/
			/*			    /	\			*/
			/*			 n+2	  n			*/
			/*							*/
			struct vm_area_struct * nodeleftleft = nodeleft->vm_avl_left;
			struct vm_area_struct * nodeleftright = nodeleft->vm_avl_right;
			int heightleftright = heightof(nodeleftright);
			if (heightof(nodeleftleft) >= heightleftright) {
				/*							  */
				/*		  *		       n+2|n+3		  */
				/*		/   \		       /    \		  */
				/*	     n+2      n	     -->      /	  n+1|n+2	  */
				/*	     / \		      |	   /	\	  */
				/*	   n+1 n|n+1		     n+1  n|n+1	 n	  */
				/*							  */
				node->vm_avl_left = nodeleftright; nodeleft->vm_avl_right = node;
				nodeleft->vm_avl_height = 1 + (node->vm_avl_height = 1 + heightleftright);
				*nodeplace = nodeleft;
			} else {
				/*							  */
				/*		  *			n+2		  */
				/*		/   \		      /	    \		  */
				/*	     n+2      n	     -->    n+1	    n+1		  */
				/*	     / \		    / \	    / \		  */
				/*	    n  n+1		   n   L   R   n	  */
				/*	       / \					  */
				/*	      L	  R					  */
				/*							  */
				nodeleft->vm_avl_right = nodeleftright->vm_avl_left;
				node->vm_avl_left = nodeleftright->vm_avl_right;
				nodeleftright->vm_avl_left = nodeleft;
				nodeleftright->vm_avl_right = node;
				nodeleft->vm_avl_height = node->vm_avl_height = heightleftright;
				nodeleftright->vm_avl_height = heightleft;
				*nodeplace = nodeleftright;
			}
		}
		else if (heightleft + 1 < heightright) {
			/* similar to the above, just interchange 'left' <--> 'right' */
			struct vm_area_struct * noderightright = noderight->vm_avl_right;
			struct vm_area_struct * noderightleft = noderight->vm_avl_left;
			int heightrightleft = heightof(noderightleft);
			if (heightof(noderightright) >= heightrightleft) {
				node->vm_avl_right = noderightleft; noderight->vm_avl_left = node;
				noderight->vm_avl_height = 1 + (node->vm_avl_height = 1 + heightrightleft);
				*nodeplace = noderight;
			} else {
				noderight->vm_avl_left = noderightleft->vm_avl_right;
				node->vm_avl_right = noderightleft->vm_avl_left;
				noderightleft->vm_avl_right = noderight;
				noderightleft->vm_avl_left = node;
				noderight->vm_avl_height = node->vm_avl_height = heightrightleft;
				noderightleft->vm_avl_height = heightright;
				*nodeplace = noderightleft;
			}
		}
		else {
			int height = (heightleft<heightright ? heightright : heightleft) + 1;
			if (height == node->vm_avl_height)
				break;
			node->vm_avl_height = height;
		}
	}
}

/* Insert a node into a tree. */
static inline void avl_insert (struct vm_area_struct * new_node, struct vm_area_struct ** ptree)
{
	vm_avl_key_t key = new_node->vm_avl_key;
	struct vm_area_struct ** nodeplace = ptree;
	struct vm_area_struct ** stack[avl_maxheight];
	int stack_count = 0;
	struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
	for (;;) {
		struct vm_area_struct * node = *nodeplace;
		if (node == avl_empty)
			break;
		*stack_ptr++ = nodeplace; stack_count++;
		if (key < node->vm_avl_key)
			nodeplace = &node->vm_avl_left;
		else
			nodeplace = &node->vm_avl_right;
	}
	new_node->vm_avl_left = avl_empty;
	new_node->vm_avl_right = avl_empty;
	new_node->vm_avl_height = 1;
	*nodeplace = new_node;
	avl_rebalance(stack_ptr,stack_count);
}

/* Insert a node into a tree, and
 * return the node to the left of it and the node to the right of it.
 */
static inline void avl_insert_neighbours (struct vm_area_struct * new_node, struct vm_area_struct ** ptree,
	struct vm_area_struct ** to_the_left, struct vm_area_struct ** to_the_right)
{
	vm_avl_key_t key = new_node->vm_avl_key;
	struct vm_area_struct ** nodeplace = ptree;
	struct vm_area_struct ** stack[avl_maxheight];
	int stack_count = 0;
	struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
	*to_the_left = *to_the_right = NULL;
	for (;;) {
		struct vm_area_struct * node = *nodeplace;
		if (node == avl_empty)
			break;
		*stack_ptr++ = nodeplace; stack_count++;
		if (key < node->vm_avl_key) {
			*to_the_right = node;
			nodeplace = &node->vm_avl_left;
		} else {
			*to_the_left = node;
			nodeplace = &node->vm_avl_right;
		}
	}
	new_node->vm_avl_left = avl_empty;
	new_node->vm_avl_right = avl_empty;
	new_node->vm_avl_height = 1;
	*nodeplace = new_node;
	avl_rebalance(stack_ptr,stack_count);
}

/* Removes a node out of a tree. */
static inline void avl_remove (struct vm_area_struct * node_to_delete, struct vm_area_struct ** ptree)
{
	vm_avl_key_t key = node_to_delete->vm_avl_key;
	struct vm_area_struct ** nodeplace = ptree;
	struct vm_area_struct ** stack[avl_maxheight];
	int stack_count = 0;
	struct vm_area_struct *** stack_ptr = &stack[0]; /* = &stack[stackcount] */
	struct vm_area_struct ** nodeplace_to_delete;
	for (;;) {
		struct vm_area_struct * node = *nodeplace;
		if (node == avl_empty) {
			/* what? node_to_delete not found in tree? */
			printk("avl_remove: node to delete not found in tree\n");
			return;
		}
		*stack_ptr++ = nodeplace; stack_count++;
		if (key == node->vm_avl_key)
			break;
		if (key < node->vm_avl_key)
			nodeplace = &node->vm_avl_left;
		else
			nodeplace = &node->vm_avl_right;
	}
	nodeplace_to_delete = nodeplace;
	/* Have to remove node_to_delete = *nodeplace_to_delete. */
	if (node_to_delete->vm_avl_left == avl_empty) {
		*nodeplace_to_delete = node_to_delete->vm_avl_right;
		stack_ptr--; stack_count--;
	} else {
		struct vm_area_struct *** stack_ptr_to_delete = stack_ptr;
		struct vm_area_struct ** nodeplace = &node_to_delete->vm_avl_left;
		struct vm_area_struct * node;
		for (;;) {
			node = *nodeplace;
			if (node->vm_avl_right == avl_empty)
				break;
			*stack_ptr++ = nodeplace; stack_count++;
			nodeplace = &node->vm_avl_right;
		}
		*nodeplace = node->vm_avl_left;
		/* node replaces node_to_delete */
		node->vm_avl_left = node_to_delete->vm_avl_left;
		node->vm_avl_right = node_to_delete->vm_avl_right;
		node->vm_avl_height = node_to_delete->vm_avl_height;
		*nodeplace_to_delete = node; /* replace node_to_delete */
		*stack_ptr_to_delete = &node->vm_avl_left; /* replace &node_to_delete->vm_avl_left */
	}
	avl_rebalance(stack_ptr,stack_count);
}

#ifdef DEBUG_AVL

/* print a list */
static void printk_list (struct vm_area_struct * vma)
{
	printk("[");
	while (vma) {
		printk("%08lX-%08lX", vma->vm_start, vma->vm_end);
		vma = vma->vm_next;
		if (!vma)
			break;
		printk(" ");
	}
	printk("]");
}

/* print a tree */
static void printk_avl (struct vm_area_struct * tree)
{
	if (tree != avl_empty) {
		printk("(");
		if (tree->vm_avl_left != avl_empty) {
			printk_avl(tree->vm_avl_left);
			printk("<");
		}
		printk("%08lX-%08lX", tree->vm_start, tree->vm_end);
		if (tree->vm_avl_right != avl_empty) {
			printk(">");
			printk_avl(tree->vm_avl_right);
		}
		printk(")");
	}
}

static char *avl_check_point = "somewhere";

/* check a tree's consistency and balancing */
static void avl_checkheights (struct vm_area_struct * tree)
{
	int h, hl, hr;

	if (tree == avl_empty)
		return;
	avl_checkheights(tree->vm_avl_left);
	avl_checkheights(tree->vm_avl_right);
	h = tree->vm_avl_height;
	hl = heightof(tree->vm_avl_left);
	hr = heightof(tree->vm_avl_right);
	if ((h == hl+1) && (hr <= hl) && (hl <= hr+1))
		return;
	if ((h == hr+1) && (hl <= hr) && (hr <= hl+1))
		return;
	printk("%s: avl_checkheights: heights inconsistent\n",avl_check_point);
}

/* check that all values stored in a tree are < key */
static void avl_checkleft (struct vm_area_struct * tree, vm_avl_key_t key)
{
	if (tree == avl_empty)
		return;
	avl_checkleft(tree->vm_avl_left,key);
	avl_checkleft(tree->vm_avl_right,key);
	if (tree->vm_avl_key < key)
		return;
	printk("%s: avl_checkleft: left key %lu >= top key %lu\n",avl_check_point,tree->vm_avl_key,key);
}

/* check that all values stored in a tree are > key */
static void avl_checkright (struct vm_area_struct * tree, vm_avl_key_t key)
{
	if (tree == avl_empty)
		return;
	avl_checkright(tree->vm_avl_left,key);
	avl_checkright(tree->vm_avl_right,key);
	if (tree->vm_avl_key > key)
		return;
	printk("%s: avl_checkright: right key %lu <= top key %lu\n",avl_check_point,tree->vm_avl_key,key);
}

/* check that all values are properly increasing */
static void avl_checkorder (struct vm_area_struct * tree)
{
	if (tree == avl_empty)
		return;
	avl_checkorder(tree->vm_avl_left);
	avl_checkorder(tree->vm_avl_right);
	avl_checkleft(tree->vm_avl_left,tree->vm_avl_key);
	avl_checkright(tree->vm_avl_right,tree->vm_avl_key);
}

/* all checks */
static void avl_check (struct task_struct * task, char *caller)
{
	avl_check_point = caller;
/*	printk("task \"%s\", %s\n",task->comm,caller); */
/*	printk("task \"%s\" list: ",task->comm); printk_list(task->mm->mmap); printk("\n"); */
/*	printk("task \"%s\" tree: ",task->comm); printk_avl(task->mm->mmap_avl); printk("\n"); */
	avl_checkheights(task->mm->mmap_avl);
	avl_checkorder(task->mm->mmap_avl);
}

#endif


/*
 * Normal function to fix up a mapping
 * This function is the default for when an area has no specific
 * function.  This may be used as part of a more specific routine.
 * This function works out what part of an area is affected and
 * adjusts the mapping information.  Since the actual page
 * manipulation is done in do_mmap(), none need be done here,
 * though it would probably be more appropriate.
 *
 * By the time this function is called, the area struct has been
 * removed from the process mapping list, so it needs to be
 * reinserted if necessary.
 *
 * The 4 main cases are:
 *    Unmapping the whole area
 *    Unmapping from the start of the segment to a point in it
 *    Unmapping from an intermediate point to the end
 *    Unmapping between to intermediate points, making a hole.
 *
 * Case 4 involves the creation of 2 new areas, for each side of
 * the hole.
 */
static void unmap_fixup(struct vm_area_struct *area,
		 unsigned long addr, size_t len)
{
	struct vm_area_struct *mpnt;
	unsigned long end = addr + len;

	if (addr < area->vm_start || addr >= area->vm_end ||
	    end <= area->vm_start || end > area->vm_end ||
	    end < addr) /* czy podane dane są poprawne */
	{
		printk("unmap_fixup: area=%lx-%lx, unmap %lx-%lx!!\n",
		       area->vm_start, area->vm_end, addr, end);
		return;
	}
	/* Od całkowitej liczby stron odejmujemy liczbę stron jaką mamy
	   usunąć */
	area->vm_mm->total_vm -= len >> PAGE_SHIFT;
	if (area->vm_flags & VM_LOCKED)
		area->vm_mm->locked_vm -= len >> PAGE_SHIFT;

	/* Unmapping the whole area */
	/* Obszar który chcemy usunąć jest równy obszarowi zajętemu przez area */
	if (addr == area->vm_start && end == area->vm_end) {
		if (area->vm_ops && area->vm_ops->close)
			area->vm_ops->close(area);
		if (area->vm_inode)
			iput(area->vm_inode);
		return;
	}

	/* Work out to one of the ends */
	/* Obszar do usunięcia jest końcem area */
	if (end == area->vm_end)
		area->vm_end = addr;
	else
	/* Obszar do usunięcia jest początkiem area */
	if (addr == area->vm_start) {
		area->vm_offset += (end - area->vm_start);
		area->vm_start = end;
	}
	else {
	/* Obszar do usunięcia rozdziela area na dwa vma */
	/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
		/* Add end mapping -- leave beginning for below */
		mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);

		if (!mpnt)
			return;
		*mpnt = *area;
		mpnt->vm_offset += (end - area->vm_start);
		mpnt->vm_start = end;
		if (mpnt->vm_inode)
			mpnt->vm_inode->i_count++;
		if (mpnt->vm_ops && mpnt->vm_ops->open)
			mpnt->vm_ops->open(mpnt);
		area->vm_end = addr;	/* Truncate area */
		insert_vm_struct(current->mm, mpnt);
	}

	/* construct whatever mapping is needed */
	/* ewentualne dodawanie drugiego mpnt (w przypadku
	   usuwania ze środka vma */
	mpnt = (struct vm_area_struct *)kmalloc(sizeof(*mpnt), GFP_KERNEL);
	if (!mpnt)
		return;
	*mpnt = *area;
	if (mpnt->vm_ops && mpnt->vm_ops->open)
		mpnt->vm_ops->open(mpnt);
	if (area->vm_ops && area->vm_ops->close) {
		area->vm_end = area->vm_start;
		area->vm_ops->close(area);
	}
	insert_vm_struct(current->mm, mpnt);
}

asmlinkage int sys_munmap(unsigned long addr, size_t len)
{
	return do_munmap(addr, len);
}

/*
 * Munmap is split into 2 main parts -- this part which finds
 * what needs doing, and the areas themselves, which do the
 * work.  This now handles partial unmappings.
 * Jeremy Fitzhardine <jeremy@sw.oz.au>
 */
int do_munmap(unsigned long addr, size_t len)
/* argumenty:
   addr - adres obszaru do odłączenia
   len	- wielkość obszaru do odłączenia
   zawraca 0 gdy wszystko odbyło się pomyślnie
   liczbe<0 gdy wystapił błąd - (zdefiniowane w errno.h) niektóre zaś
   opisane w funkcji do_mmap
*/

{
	struct vm_area_struct *mpnt, *prev, *next, **npp, *free;

	if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
		return -EINVAL; /* niewłaściwy argument */

	if ((len = PAGE_ALIGN(len)) == 0)
		return 0; /* nie chcemy nic zmieniać więc wychodzimy */
	/* PAGE_ALIGN(len)=addr gdy (len mod PAGE_SIZE)==0
			   =PAGE_SIZE*(len div PAGE_SIZE +1)
	   PAGE_SIZE - rozmiar strony
	*/

	/*
	 * Check if this memory area is ok - put it on the temporary
	 * list if so..	 The checks here are pretty simple --
	 * every area affected in some way (by any overlap) is put
	 * on the list.	 If nothing is put on, nothing is affected.
	 */
	mpnt = find_vma(current->mm, addr);
	/* find_vma wyszukuje pierwszego vma który spełnia addr<vm_end
	   NULL jeśli takiego nie ma */

	if (!mpnt)
		return 0;
	avl_neighbours(mpnt, current->mm->mmap_avl, &prev, &next);
	/* we have  prev->vm_next == mpnt && mpnt->vm_next = next */
	/* and	addr < mpnt->vm_end  */

	npp = (prev ? &prev->vm_next : ¤t->mm->mmap);
	free = NULL;
	/* wstawianie na listę free, vma do usunięcia
	    znajdujące się w przedziale (addr, addr+len) */
	for ( ; mpnt && mpnt->vm_start < addr+len; mpnt = *npp) {
		*npp = mpnt->vm_next;
		mpnt->vm_next = free;
		free = mpnt;
		/* wstawiliśmy już na listę więc możemy usunąć mpnt z AVL */
		avl_remove(mpnt, ¤t->mm->mmap_avl);
	}
	/* nic nie wstawiono więc nic nię można usunąć */
	if (free == NULL)
		return 0;

	/*
	 * Ok - we have the memory areas we should free on the 'free' list,
	 * so release them, and unmap the page range..
	 * If the one of the segments is only being partially unmapped,
	 * it will put new vm_area_struct(s) into the address space.
	 */
	do {
		unsigned long st, end;

		mpnt = free;
		free = free->vm_next;

		/* usuwanie mpnt z list inode i i_mmap */
		remove_shared_vm_struct(mpnt);

		st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
		end = addr+len;
		end = end > mpnt->vm_end ? mpnt->vm_end : end;

		if (mpnt->vm_ops && mpnt->vm_ops->unmap)
			mpnt->vm_ops->unmap(mpnt, st, end-st);
		//usuwanie stron o adresach z przedziału (st, end-st)
		zap_page_range(current->mm, st, end-st);
		unmap_fixup(mpnt, st, end-st);
		kfree(mpnt);
	} while (free); /* dopóki są jeszcze jakieś elementy do usunięcia */

	/* we could zap the page tables here too.. */

	return 0;
}

/* Build the AVL tree corresponding to the VMA list. */
void build_mmap_avl(struct mm_struct * mm)
{
	struct vm_area_struct * vma;

	mm->mmap_avl = NULL;
	for (vma = mm->mmap; vma; vma = vma->vm_next)
		avl_insert(vma, &mm->mmap_avl);
}

/* Release all mmaps. */
void exit_mmap(struct mm_struct * mm)
/* usuwa wszystkie mapowania plików */
{
	struct vm_area_struct * mpnt;

	mpnt = mm->mmap;
	mm->mmap = NULL;
	mm->mmap_avl = NULL;
	mm->rss = 0;
	mm->total_vm = 0;
	mm->locked_vm = 0;
	while (mpnt) {
		struct vm_area_struct * next = mpnt->vm_next;
		if (mpnt->vm_ops) {
			if (mpnt->vm_ops->unmap)
				mpnt->vm_ops->unmap(mpnt, mpnt->vm_start, mpnt->vm_end-mpnt->vm_start);
			if (mpnt->vm_ops->close)
				mpnt->vm_ops->close(mpnt);
		}
		remove_shared_vm_struct(mpnt);
		/* usuwanie stron o adresach z podanego przedziału */
		zap_page_range(mm, mpnt->vm_start, mpnt->vm_end-mpnt->vm_start);
		if (mpnt->vm_inode)
			iput(mpnt->vm_inode);
		kfree(mpnt);
		mpnt = next;
	}
}

/*
 * Insert vm structure into process list sorted by address
 * and into the inode's i_mmap ring.
 */
void insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp)
/* wstawianie vmp do listy vm-ów procesu sortowanej względem adresów
   oraz do listy i_mmap węzła inode */
{
	struct vm_area_struct *share;
	struct inode * inode;

#if 0 /* equivalent, but slow */
	struct vm_area_struct **p, *mpnt;

	p = &mm->mmap;
	while ((mpnt = *p) != NULL) {
		if (mpnt->vm_start > vmp->vm_start)
			break;
		if (mpnt->vm_end > vmp->vm_start)
			printk("insert_vm_struct: overlapping memory areas\n");
		p = &mpnt->vm_next;
	}
	vmp->vm_next = mpnt;
	*p = vmp;
#else
	struct vm_area_struct * prev, * next;

	avl_insert_neighbours(vmp, &mm->mmap_avl, &prev, &next);
	if ((prev ? prev->vm_next : mm->mmap) != next)
		printk("insert_vm_struct: tree inconsistent with list\n");
	if (prev)
		prev->vm_next = vmp;
	else
		mm->mmap = vmp;
	vmp->vm_next = next;
#endif

	inode = vmp->vm_inode;
	if (!inode)
		return;

	/* insert vmp into inode's circular share list */
	if ((share = inode->i_mmap)) {
		vmp->vm_next_share = share->vm_next_share;
		vmp->vm_next_share->vm_prev_share = vmp;
		share->vm_next_share = vmp;
		vmp->vm_prev_share = share;
	} else
		inode->i_mmap = vmp->vm_next_share = vmp->vm_prev_share = vmp;
}

/*
 * Remove one vm structure from the inode's i_mmap ring.
 */
void remove_shared_vm_struct(struct vm_area_struct *mpnt)
/* Usuwanie mpnt z listy i_mmap */
{
	struct inode * inode = mpnt->vm_inode;

	if (!inode)
		return;

	if (mpnt->vm_next_share == mpnt) {
		if (inode->i_mmap != mpnt)
			printk("Inode i_mmap ring corrupted\n");
		inode->i_mmap = NULL;
		return;
	}

	if (inode->i_mmap == mpnt)
		inode->i_mmap = mpnt->vm_next_share;

	mpnt->vm_prev_share->vm_next_share = mpnt->vm_next_share;
	mpnt->vm_next_share->vm_prev_share = mpnt->vm_prev_share;
}

/*
 * Merge the list of memory segments if possible.
 * Redundant vm_area_structs are freed.
 * This assumes that the list is ordered by address.
 * We don't need to traverse the entire list, only those segments
 * which intersect or are adjacent to a given interval.
 */
void merge_segments (struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
{
	struct vm_area_struct *prev, *mpnt, *next;
	/* blokujemy semaforem dostęp do mm aby nie bylo sytuacji w której
	   proces odwołuje się do czegoś co zostało zmienione natomiast
	   jeszcze nie uaktualnione*/
	down(&mm->mmap_sem);
	mpnt = find_vma(mm, start_addr); /* pierwsze vma z przedziału */
	if (!mpnt)
		goto no_vma;

	avl_neighbours(mpnt, mm->mmap_avl, &prev, &next);
	/* we have  prev->vm_next == mpnt && mpnt->vm_next = next */

	if (!prev) {
		prev = mpnt;
		mpnt = next;
	}

	/* prev and mpnt cycle through the list, as long as
	 * start_addr < mpnt->vm_end && prev->vm_start < end_addr
	 */
	 /*przesuwamy się po kolejnych vma o adresach z żądanego przedziału */
	for ( ; mpnt && prev->vm_start < end_addr ; prev = mpnt, mpnt = next) {
#if 0
		printk("looping in merge_segments, mpnt=0x%lX\n", (unsigned long) mpnt);
#endif

		next = mpnt->vm_next;

		/*
		 * To share, we must have the same inode, operations..
		 */
		 /* vma do połączenia muszą mieć identyczne pola zawierające
		    informacje o nich */
		if (mpnt->vm_inode != prev->vm_inode)
			continue;
		if (mpnt->vm_pte != prev->vm_pte)
			continue;
		if (mpnt->vm_ops != prev->vm_ops)
			continue;
		if (mpnt->vm_flags != prev->vm_flags)
			continue;
		if (prev->vm_end != mpnt->vm_start)
			continue;
		/*
		 * and if we have an inode, the offsets must be contiguous..
		 */
		if ((mpnt->vm_inode != NULL) || (mpnt->vm_flags & VM_SHM)) {
			/* sprawdzamy czy można je złączyć */
			if (prev->vm_offset + prev->vm_end - prev->vm_start != mpnt->vm_offset)
				continue;
		}

		/*
		 * merge prev with mpnt and set up pointers so the new
		 * big segment can possibly merge with the next one.
		 * The old unused mpnt is freed.
		 */
		 /* natrafiliśmy na dwa vma między którymi nie ma dziury
		    tzn. które można połączyć */
		avl_remove(mpnt, &mm->mmap_avl);
		prev->vm_end = mpnt->vm_end;
		prev->vm_next = mpnt->vm_next;
		if (mpnt->vm_ops && mpnt->vm_ops->close) {
			mpnt->vm_offset += mpnt->vm_end - mpnt->vm_start;
			mpnt->vm_start = mpnt->vm_end;
			mpnt->vm_ops->close(mpnt);
		}
		remove_shared_vm_struct(mpnt);
		if (mpnt->vm_inode)
			mpnt->vm_inode->i_count--;
		kfree_s(mpnt, sizeof(*mpnt));
		mpnt = prev;
	}
no_vma:
	up(&mm->mmap_sem); /* podnosimy semafor */
}

Radoslaw Szymaszek