komentarze polskie Jakub Posiadała




#ifndef _I386_PGTABLE_H
#define _I386_PGTABLE_H

#include <linux/config.h>

/*
 * Define USE_PENTIUM_MM if you want the 4MB page table optimizations.
 * This works only on a intel Pentium.
 */
#define USE_PENTIUM_MM 1

/*
 * The Linux memory management assumes a three-level page table setup. On
 * the i386, we use that, but "fold" the mid level into the top-level page
 * table, so that we physically have the same two-level page table as the
 * i386 mmu expects.
 *
 * This file contains the functions and defines necessary to modify and use
 * the i386 page table tree.
 */

//============================================================================
//
//Plik zawiera ustawienia wartości i definicje funkcji  zależnych od archiektury procesora.
//
//============================================================================

/* Caches aren't brain-dead on the intel. */

//===========================================================================
// Te operacje bierze na siebie procesor intela,więc tu są zdefiniowane jako
// puste.
//===========================================================================


#define flush_cache_all()                       do { } while (0)
#define flush_cache_mm(mm)                      do { } while (0)
#define flush_cache_range(mm, start, end)       do { } while (0)
#define flush_cache_page(vma, vmaddr)           do { } while (0)
#define flush_page_to_ram(page)                 do { } while (0)

/*
 * TLB flushing:
 *
 *  - flush_tlb() flushes the current mm struct TLBs
 *  - flush_tlb_all() flushes all processes TLBs
 *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
 *  - flush_tlb_page(vma, vmaddr) flushes one page
 *  - flush_tlb_range(mm, start, end) flushes a range of pages
 *
 * ..but the i386 has somewhat limited tlb flushing capabilities,
 * and page-granular flushes are available only on i486 and up.
 */



#define __flush_tlb() \
do { unsigned long tmpreg; __asm__ __volatile__("movl %%cr3,%0\n\tmovl %0,%%cr3":"=r" (tmpreg) : :"memory"); } while (0)

#ifdef CONFIG_M386
#define __flush_tlb_one(addr) flush_tlb()
#else
#define __flush_tlb_one(addr) \
__asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
#endif
 
#ifndef __SMP__

#define flush_tlb() __flush_tlb()
#define flush_tlb_all() __flush_tlb()
#define local_flush_tlb() __flush_tlb()

static inline void flush_tlb_mm(struct mm_struct *mm)
{
        if (mm == current->mm)
                __flush_tlb();
}

static inline void flush_tlb_page(struct vm_area_struct *vma,
        unsigned long addr)
{
        if (vma->vm_mm == current->mm)
                __flush_tlb_one(addr);
}

static inline void flush_tlb_range(struct mm_struct *mm,
        unsigned long start, unsigned long end)
{
        if (mm == current->mm)
                __flush_tlb();
}

#else

/*
 * We aren't very clever about this yet -  SMP could certainly
 * avoid some global flushes..
 */

#include <asm/smp.h>

#define local_flush_tlb() \
        __flush_tlb()


#define CLEVER_SMP_INVALIDATE
#ifdef CLEVER_SMP_INVALIDATE

/*
 *      Smarter SMP flushing macros. 
 *              c/o Linus Torvalds.
 *
 *      These mean you can really definitely utterly forget about
 *      writing to user space from interrupts. (Its not allowed anyway).
 */
 
static inline void flush_tlb_current_task(void)
{
        if (current->mm->count == 1)    /* just one copy of this mm */
                local_flush_tlb();      /* and that's us, so.. */
        else
                smp_flush_tlb();
}

#define flush_tlb() flush_tlb_current_task()

#define flush_tlb_all() smp_flush_tlb()

static inline void flush_tlb_mm(struct mm_struct * mm)
{
        if (mm == current->mm && mm->count == 1)
                local_flush_tlb();
        else
                smp_flush_tlb();
}

static inline void flush_tlb_page(struct vm_area_struct * vma,
        unsigned long va)
{
        if (vma->vm_mm == current->mm && current->mm->count == 1)
                __flush_tlb_one(va);
        else
                smp_flush_tlb();
}

static inline void flush_tlb_range(struct mm_struct * mm,
        unsigned long start, unsigned long end)
{
        flush_tlb_mm(mm);
}


#else

#define flush_tlb() \
        smp_flush_tlb()

#define flush_tlb_all() flush_tlb()

static inline void flush_tlb_mm(struct mm_struct *mm)
{
        flush_tlb();
}

static inline void flush_tlb_page(struct vm_area_struct *vma,
        unsigned long addr)
{
        flush_tlb();
}

static inline void flush_tlb_range(struct mm_struct *mm,
        unsigned long start, unsigned long end)
{
        flush_tlb();
}
#endif
#endif


/* Certain architectures need to do special things when pte's
 * within a page table are directly modified.  Thus, the following
 * hook is made available.
 */
#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))

/* PMD_SHIFT determines the size of the area a second-level page table can map */

//================================================================================
//
//   Określenie rozmiaru tablicy stron - drugiego poziomu struktury pamięci.
//   Jak łatwo obliczyć tablica stron ma pojemność 2^22 czyli 4MB
//
//================================================================================

#define PMD_SHIFT       22
#define PMD_SIZE        (1UL << PMD_SHIFT)
#define PMD_MASK        (~(PMD_SIZE-1))

/* PGDIR_SHIFT determines what a third-level page table entry can map */

//=============================================================================
//
//   Określenie rozmiaru katalogu tablicy stron - trzeciego poziomu struktury 
//   pamięci. Jest to również 4MB.
//
//=============================================================================

#define PGDIR_SHIFT     22
#define PGDIR_SIZE      (1UL << PGDIR_SHIFT)
#define PGDIR_MASK      (~(PGDIR_SIZE-1))

/*
 * entries per page directory level: the i386 is two-level, so
 * we don't really have any PMD directory physically.
 *

//===========================================================================
// Tutaj właśnie widać, ze struktura pamięci w Linuxie jest tak naprawdę
// dwupoziomowa
//=========================================================================== 
#define PTRS_PER_PTE    1024
#define PTRS_PER_PMD    1
#define PTRS_PER_PGD    1024

/* Just any arbitrary offset to the start of the vmalloc VM area: the
 * current 8MB value just means that there will be a 8MB "hole" after the
 * physical memory until the kernel virtual memory starts.  That means that
 * any out-of-bounds memory accesses will hopefully be caught.
 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
 * area for the same reason. ;)
 */
#define VMALLOC_OFFSET  (8*1024*1024)
#define VMALLOC_START ((high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
#define VMALLOC_VMADDR(x) (TASK_SIZE + (unsigned long)(x))

/*
 * The 4MB page is guessing..  Detailed in the infamous "Chapter H"
 * of the Pentium details, but assuming intel did the straightforward
 * thing, this bit set in the page directory entry just means that
 * the page directory entry points directly to a 4MB-aligned block of
 * memory. 
 */

//=============================================================================
//
// Istnieją pogłoski mówiące, że konstruktorzy Pentium dali mozliwość
// tworzenia 4MB ramek ( tak, cztery megabajty). Nie zostało to jednak
// opisane w oficjalnej dokumentacji procesora, więc są to tylko domysły.   
//
//=============================================================================

#define _PAGE_PRESENT   0x001
#define _PAGE_RW        0x002
#define _PAGE_USER      0x004
#define _PAGE_PCD       0x010
#define _PAGE_ACCESSED  0x020
#define _PAGE_DIRTY     0x040
#define _PAGE_4M        0x080   /* 4 MB page, Pentium+.. */

//=============================================================================
// Definicje stałych określających właściwości strony
// Dokładny opis znaczenia poszczególnych bitów w pliku mem_map_t
//=============================================================================

#define _PAGE_TABLE     (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
#define _PAGE_CHG_MASK  (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)

#define PAGE_NONE       __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED)
#define PAGE_SHARED     __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
#define PAGE_COPY       __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define PAGE_READONLY   __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define PAGE_KERNEL     __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)

/*
 * The i386 can't do page protection for execute, and considers that the same are read.
 * Also, write permissions imply read permissions. This is the closest we can get..
 */
#define __P000  PAGE_NONE
#define __P001  PAGE_READONLY
#define __P010  PAGE_COPY
#define __P011  PAGE_COPY
#define __P100  PAGE_READONLY
#define __P101  PAGE_READONLY
#define __P110  PAGE_COPY
#define __P111  PAGE_COPY

#define __S000  PAGE_NONE
#define __S001  PAGE_READONLY
#define __S010  PAGE_SHARED
#define __S011  PAGE_SHARED
#define __S100  PAGE_READONLY
#define __S101  PAGE_READONLY
#define __S110  PAGE_SHARED
#define __S111  PAGE_SHARED

/*
 * Define this if things work differently on a i386 and a i486:
 * it will (on a i486) warn about kernel memory accesses that are
 * done without a 'verify_area(VERIFY_WRITE,..)'
 */
#undef TEST_VERIFY_AREA

/* page table for 0-4MB for everybody */
extern unsigned long pg0[1024];
/* zero page used for uninitialized stuff */
extern unsigned long empty_zero_page[1024];

/*
 * BAD_PAGETABLE is used when we need a bogus page-table, while
 * BAD_PAGE is used for a bogus page.
 *
 * ZERO_PAGE is a global shared page that is always zero: used
 * for zero-mapped memory areas etc..
 */

extern pte_t __bad_page(void);
extern pte_t * __bad_pagetable(void);

#define BAD_PAGETABLE __bad_pagetable()
#define BAD_PAGE __bad_page()
#define ZERO_PAGE ((unsigned long) empty_zero_page)

/* number of bits that fit into a memory pointer */
#define BITS_PER_PTR                    (8*sizeof(unsigned long))

/* to align the pointer to a pointer address */
#define PTR_MASK                        (~(sizeof(void*)-1))

/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */
/* 64-bit machines, beware!  SRB. */
#define SIZEOF_PTR_LOG2                 2

/* to find an entry in a page-table */
#define PAGE_PTR(address) \
((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK)

/* to set the page-dir */
#define SET_PAGE_DIR(tsk,pgdir) \
do { \
        (tsk)->tss.cr3 = (unsigned long) (pgdir); \
        if ((tsk) == current) \
                __asm__ __volatile__("movl %0,%%cr3": :"r" (pgdir)); \
} while (0)

//=====================================================================================
//  Funkcje zwracające informacje o statusie ramek i stron. 
//=====================================================================================

#define pte_none(x)     (!pte_val(x))
#define pte_present(x)  (pte_val(x) & _PAGE_PRESENT)
#define pte_clear(xp)   do { pte_val(*(xp)) = 0; } while (0)

#define pmd_none(x)     (!pmd_val(x))
#define pmd_bad(x)      ((pmd_val(x) & ~PAGE_MASK) != _PAGE_TABLE)
#define pmd_present(x)  (pmd_val(x) & _PAGE_PRESENT)
#define pmd_clear(xp)   do { pmd_val(*(xp)) = 0; } while (0)

/*
 * The "pgd_xxx()" functions here are trivial for a folded two-level
 * setup: the pgd is never bad, and a pmd always exists (as it's folded
 * into the pgd entry)
 */
extern inline int pgd_none(pgd_t pgd)           { return 0; }
extern inline int pgd_bad(pgd_t pgd)            { return 0; }
extern inline int pgd_present(pgd_t pgd)        { return 1; }
extern inline void pgd_clear(pgd_t * pgdp)      { }

/*
 * The following only work if pte_present() is true.
 * Undefined behaviour if not..
 */

//=====================================================================================
//  Funkcje zwracające informacje o statusie ramki. 
//  Definicja struktury pte_t w pliku page.h.
//  Opis struktury page
//=====================================================================================

extern inline int pte_read(pte_t pte)           { return pte_val(pte) & _PAGE_USER; }
extern inline int pte_write(pte_t pte)          { return pte_val(pte) & _PAGE_RW; }
extern inline int pte_exec(pte_t pte)           { return pte_val(pte) & _PAGE_USER; }
extern inline int pte_dirty(pte_t pte)          { return pte_val(pte) & _PAGE_DIRTY; }
extern inline int pte_young(pte_t pte)          { return pte_val(pte) & _PAGE_ACCESSED; }

extern inline pte_t pte_wrprotect(pte_t pte)    { pte_val(pte) &= ~_PAGE_RW; return pte; }
extern inline pte_t pte_rdprotect(pte_t pte)    { pte_val(pte) &= ~_PAGE_USER; return pte; }
extern inline pte_t pte_exprotect(pte_t pte)    { pte_val(pte) &= ~_PAGE_USER; return pte; }
extern inline pte_t pte_mkclean(pte_t pte)      { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
extern inline pte_t pte_mkold(pte_t pte)        { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
extern inline pte_t pte_mkwrite(pte_t pte)      { pte_val(pte) |= _PAGE_RW; return pte; }
extern inline pte_t pte_mkread(pte_t pte)       { pte_val(pte) |= _PAGE_USER; return pte; }
extern inline pte_t pte_mkexec(pte_t pte)       { pte_val(pte) |= _PAGE_USER; return pte; }
extern inline pte_t pte_mkdirty(pte_t pte)      { pte_val(pte) |= _PAGE_DIRTY; return pte; }
extern inline pte_t pte_mkyoung(pte_t pte)      { pte_val(pte) |= _PAGE_ACCESSED; return pte; }

/*
 * Conversion functions: convert a page and protection to a page entry,
 * and a page entry and page directetory to the page they refer to.
 */
//==============================================================================
//
//  Funkcje do wykonywania najprostszych operacji na ramkach
//
//==============================================================================
// Tworzenie nowej ramki o zadanym  parametrze ochrony

extern inline pte_t mk_pte(unsigned long page, pgprot_t pgprot)
{ pte_t pte; pte_val(pte) = page | pgprot_val(pgprot); return pte; }

// Zmiana praw dostępu do ramki 
extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; }

extern inline unsigned long pte_page(pte_t pte)
{ return pte_val(pte) & PAGE_MASK; }

extern inline unsigned long pmd_page(pmd_t pmd)
{ return pmd_val(pmd) & PAGE_MASK; }

/* to find an entry in a page-table-directory */
extern inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
{
        return mm->pgd + (address >> PGDIR_SHIFT);
}

/* Find an entry in the second-level page table.. */
extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
{
        return (pmd_t *) dir;
}

/* Find an entry in the third-level page table.. */ 
extern inline pte_t * pte_offset(pmd_t * dir, unsigned long address)
{
        return (pte_t *) pmd_page(*dir) + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
}

/*
 * Allocate and free page tables. The xxx_kernel() versions are
 * used to allocate a kernel page table - this turns on ASN bits
 * if any.
 */
extern inline void pte_free_kernel(pte_t * pte)
{
        free_page((unsigned long) pte);
}

extern const char bad_pmd_string[];


extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address)
{
        address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
        if (pmd_none(*pmd)) {
                pte_t * page = (pte_t *) get_free_page(GFP_KERNEL);
                if (pmd_none(*pmd)) {
                        if (page) {
                                pmd_val(*pmd) = _PAGE_TABLE | (unsigned long) page;
                                return page + address;
                        }
                        pmd_val(*pmd) = _PAGE_TABLE | (unsigned long) BAD_PAGETABLE;
                        return NULL;
                }
                free_page((unsigned long) page);
        }
        if (pmd_bad(*pmd)) {
                printk(bad_pmd_string, pmd_val(*pmd));
                pmd_val(*pmd) = _PAGE_TABLE | (unsigned long) BAD_PAGETABLE;
                return NULL;
        }
        return (pte_t *) pmd_page(*pmd) + address;
}

/*
 * allocating and freeing a pmd is trivial: the 1-entry pmd is
 * inside the pgd, so has no extra memory associated with it.
 */
extern inline void pmd_free_kernel(pmd_t * pmd)
{
        pmd_val(*pmd) = 0;
}

extern inline pmd_t * pmd_alloc_kernel(pgd_t * pgd, unsigned long address)
{
        return (pmd_t *) pgd;
}

extern inline void pte_free(pte_t * pte)
{
        free_page((unsigned long) pte);
}


extern inline pte_t * pte_alloc(pmd_t * pmd, unsigned long address)
{
        address = (address >> (PAGE_SHIFT-2)) & 4*(PTRS_PER_PTE - 1);

repeat:
        if (pmd_none(*pmd))
                goto getnew;
        if (pmd_bad(*pmd))
                goto fix;
        return (pte_t *) (pmd_page(*pmd) + address);
        
getnew:
{
        unsigned long page = __get_free_page(GFP_KERNEL);
        if (!pmd_none(*pmd))
                goto freenew;
        if (!page)
                goto oom;
        memset((void *) page, 0, PAGE_SIZE);
        pmd_val(*pmd) = _PAGE_TABLE | page;
        return (pte_t *) (page + address);
freenew:
        free_page(page);
        goto repeat;
}

fix:
        printk(bad_pmd_string, pmd_val(*pmd));
oom:
        pmd_val(*pmd) = _PAGE_TABLE | (unsigned long) BAD_PAGETABLE;
        return NULL;
}

/*
 * allocating and freeing a pmd is trivial: the 1-entry pmd is
 * inside the pgd, so has no extra memory associated with it.
 */
extern inline void pmd_free(pmd_t * pmd)
{
        pmd_val(*pmd) = 0;
}

extern inline pmd_t * pmd_alloc(pgd_t * pgd, unsigned long address)
{
        return (pmd_t *) pgd;
}

extern inline void pgd_free(pgd_t * pgd)
{
        free_page((unsigned long) pgd);
}

extern inline pgd_t * pgd_alloc(void)
{
        return (pgd_t *) get_free_page(GFP_KERNEL);
}

extern pgd_t swapper_pg_dir[1024];

/*
 * The i386 doesn't have any external MMU info: the kernel page
 * tables contain all the necessary information.
 */
extern inline void update_mmu_cache(struct vm_area_struct * vma,
        unsigned long address, pte_t pte)
{
}

#define SWP_TYPE(entry) (((entry) >> 1) & 0x7f)
#define SWP_OFFSET(entry) ((entry) >> 8)
#define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << 8))

#endif /* _I386_PAGE_H */