Skomentowany plik pgtable.h
komentarze polskie Jakub Posiadała #ifndef _I386_PGTABLE_H #define _I386_PGTABLE_H #include <linux/config.h> /* * Define USE_PENTIUM_MM if you want the 4MB page table optimizations. * This works only on a intel Pentium. */ #define USE_PENTIUM_MM 1 /* * The Linux memory management assumes a three-level page table setup. On * the i386, we use that, but "fold" the mid level into the top-level page * table, so that we physically have the same two-level page table as the * i386 mmu expects. * * This file contains the functions and defines necessary to modify and use * the i386 page table tree. */ //============================================================================ // //Plik zawiera ustawienia wartości i definicje funkcji zależnych od archiektury procesora. // //============================================================================ /* Caches aren't brain-dead on the intel. */ //=========================================================================== // Te operacje bierze na siebie procesor intela,więc tu są zdefiniowane jako // puste. //=========================================================================== #define flush_cache_all() do { } while (0) #define flush_cache_mm(mm) do { } while (0) #define flush_cache_range(mm, start, end) do { } while (0) #define flush_cache_page(vma, vmaddr) do { } while (0) #define flush_page_to_ram(page) do { } while (0) /* * TLB flushing: * * - flush_tlb() flushes the current mm struct TLBs * - flush_tlb_all() flushes all processes TLBs * - flush_tlb_mm(mm) flushes the specified mm context TLB's * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(mm, start, end) flushes a range of pages * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. */ #define __flush_tlb() \ do { unsigned long tmpreg; __asm__ __volatile__("movl %%cr3,%0\n\tmovl %0,%%cr3":"=r" (tmpreg) : :"memory"); } while (0) #ifdef CONFIG_M386 #define __flush_tlb_one(addr) flush_tlb() #else #define __flush_tlb_one(addr) \ __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr)) #endif #ifndef __SMP__ #define flush_tlb() __flush_tlb() #define flush_tlb_all() __flush_tlb() #define local_flush_tlb() __flush_tlb() static inline void flush_tlb_mm(struct mm_struct *mm) { if (mm == current->mm) __flush_tlb(); } static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { if (vma->vm_mm == current->mm) __flush_tlb_one(addr); } static inline void flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) { if (mm == current->mm) __flush_tlb(); } #else /* * We aren't very clever about this yet - SMP could certainly * avoid some global flushes.. */ #include <asm/smp.h> #define local_flush_tlb() \ __flush_tlb() #define CLEVER_SMP_INVALIDATE #ifdef CLEVER_SMP_INVALIDATE /* * Smarter SMP flushing macros. * c/o Linus Torvalds. * * These mean you can really definitely utterly forget about * writing to user space from interrupts. (Its not allowed anyway). */ static inline void flush_tlb_current_task(void) { if (current->mm->count == 1) /* just one copy of this mm */ local_flush_tlb(); /* and that's us, so.. */ else smp_flush_tlb(); } #define flush_tlb() flush_tlb_current_task() #define flush_tlb_all() smp_flush_tlb() static inline void flush_tlb_mm(struct mm_struct * mm) { if (mm == current->mm && mm->count == 1) local_flush_tlb(); else smp_flush_tlb(); } static inline void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) { if (vma->vm_mm == current->mm && current->mm->count == 1) __flush_tlb_one(va); else smp_flush_tlb(); } static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, unsigned long end) { flush_tlb_mm(mm); } #else #define flush_tlb() \ smp_flush_tlb() #define flush_tlb_all() flush_tlb() static inline void flush_tlb_mm(struct mm_struct *mm) { flush_tlb(); } static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { flush_tlb(); } static inline void flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) { flush_tlb(); } #endif #endif /* Certain architectures need to do special things when pte's * within a page table are directly modified. Thus, the following * hook is made available. */ #define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval)) /* PMD_SHIFT determines the size of the area a second-level page table can map */ //================================================================================ // // Określenie rozmiaru tablicy stron - drugiego poziomu struktury pamięci. // Jak łatwo obliczyć tablica stron ma pojemność 2^22 czyli 4MB // //================================================================================ #define PMD_SHIFT 22 #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) /* PGDIR_SHIFT determines what a third-level page table entry can map */ //============================================================================= // // Określenie rozmiaru katalogu tablicy stron - trzeciego poziomu struktury // pamięci. Jest to również 4MB. // //============================================================================= #define PGDIR_SHIFT 22 #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) /* * entries per page directory level: the i386 is two-level, so * we don't really have any PMD directory physically. * //=========================================================================== // Tutaj właśnie widać, ze struktura pamięci w Linuxie jest tak naprawdę // dwupoziomowa //=========================================================================== #define PTRS_PER_PTE 1024 #define PTRS_PER_PMD 1 #define PTRS_PER_PGD 1024 /* Just any arbitrary offset to the start of the vmalloc VM area: the * current 8MB value just means that there will be a 8MB "hole" after the * physical memory until the kernel virtual memory starts. That means that * any out-of-bounds memory accesses will hopefully be caught. * The vmalloc() routines leaves a hole of 4kB between each vmalloced * area for the same reason. ;) */ #define VMALLOC_OFFSET (8*1024*1024) #define VMALLOC_START ((high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) #define VMALLOC_VMADDR(x) (TASK_SIZE + (unsigned long)(x)) /* * The 4MB page is guessing.. Detailed in the infamous "Chapter H" * of the Pentium details, but assuming intel did the straightforward * thing, this bit set in the page directory entry just means that * the page directory entry points directly to a 4MB-aligned block of * memory. */ //============================================================================= // // Istnieją pogłoski mówiące, że konstruktorzy Pentium dali mozliwość // tworzenia 4MB ramek ( tak, cztery megabajty). Nie zostało to jednak // opisane w oficjalnej dokumentacji procesora, więc są to tylko domysły. // //============================================================================= #define _PAGE_PRESENT 0x001 #define _PAGE_RW 0x002 #define _PAGE_USER 0x004 #define _PAGE_PCD 0x010 #define _PAGE_ACCESSED 0x020 #define _PAGE_DIRTY 0x040 #define _PAGE_4M 0x080 /* 4 MB page, Pentium+.. */ //============================================================================= // Definicje stałych określających właściwości strony // Dokładny opis znaczenia poszczególnych bitów w pliku mem_map_t //============================================================================= #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) #define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) /* * The i386 can't do page protection for execute, and considers that the same are read. * Also, write permissions imply read permissions. This is the closest we can get.. */ #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_COPY #define __P011 PAGE_COPY #define __P100 PAGE_READONLY #define __P101 PAGE_READONLY #define __P110 PAGE_COPY #define __P111 PAGE_COPY #define __S000 PAGE_NONE #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED #define __S100 PAGE_READONLY #define __S101 PAGE_READONLY #define __S110 PAGE_SHARED #define __S111 PAGE_SHARED /* * Define this if things work differently on a i386 and a i486: * it will (on a i486) warn about kernel memory accesses that are * done without a 'verify_area(VERIFY_WRITE,..)' */ #undef TEST_VERIFY_AREA /* page table for 0-4MB for everybody */ extern unsigned long pg0[1024]; /* zero page used for uninitialized stuff */ extern unsigned long empty_zero_page[1024]; /* * BAD_PAGETABLE is used when we need a bogus page-table, while * BAD_PAGE is used for a bogus page. * * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ extern pte_t __bad_page(void); extern pte_t * __bad_pagetable(void); #define BAD_PAGETABLE __bad_pagetable() #define BAD_PAGE __bad_page() #define ZERO_PAGE ((unsigned long) empty_zero_page) /* number of bits that fit into a memory pointer */ #define BITS_PER_PTR (8*sizeof(unsigned long)) /* to align the pointer to a pointer address */ #define PTR_MASK (~(sizeof(void*)-1)) /* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */ /* 64-bit machines, beware! SRB. */ #define SIZEOF_PTR_LOG2 2 /* to find an entry in a page-table */ #define PAGE_PTR(address) \ ((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK) /* to set the page-dir */ #define SET_PAGE_DIR(tsk,pgdir) \ do { \ (tsk)->tss.cr3 = (unsigned long) (pgdir); \ if ((tsk) == current) \ __asm__ __volatile__("movl %0,%%cr3": :"r" (pgdir)); \ } while (0) //===================================================================================== // Funkcje zwracające informacje o statusie ramek i stron. //===================================================================================== #define pte_none(x) (!pte_val(x)) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) #define pte_clear(xp) do { pte_val(*(xp)) = 0; } while (0) #define pmd_none(x) (!pmd_val(x)) #define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK) != _PAGE_TABLE) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) #define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) /* * The "pgd_xxx()" functions here are trivial for a folded two-level * setup: the pgd is never bad, and a pmd always exists (as it's folded * into the pgd entry) */ extern inline int pgd_none(pgd_t pgd) { return 0; } extern inline int pgd_bad(pgd_t pgd) { return 0; } extern inline int pgd_present(pgd_t pgd) { return 1; } extern inline void pgd_clear(pgd_t * pgdp) { } /* * The following only work if pte_present() is true. * Undefined behaviour if not.. */ //===================================================================================== // Funkcje zwracające informacje o statusie ramki. // Definicja struktury pte_t w pliku page.h. // Opis struktury page //===================================================================================== extern inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER; } extern inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } extern inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_USER; } extern inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } extern inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } extern inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_RW; return pte; } extern inline pte_t pte_rdprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_USER; return pte; } extern inline pte_t pte_exprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_USER; return pte; } extern inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } extern inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_RW; return pte; } extern inline pte_t pte_mkread(pte_t pte) { pte_val(pte) |= _PAGE_USER; return pte; } extern inline pte_t pte_mkexec(pte_t pte) { pte_val(pte) |= _PAGE_USER; return pte; } extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directetory to the page they refer to. */ //============================================================================== // // Funkcje do wykonywania najprostszych operacji na ramkach // //============================================================================== // Tworzenie nowej ramki o zadanym parametrze ochrony extern inline pte_t mk_pte(unsigned long page, pgprot_t pgprot) { pte_t pte; pte_val(pte) = page | pgprot_val(pgprot); return pte; } // Zmiana praw dostępu do ramki extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; } extern inline unsigned long pte_page(pte_t pte) { return pte_val(pte) & PAGE_MASK; } extern inline unsigned long pmd_page(pmd_t pmd) { return pmd_val(pmd) & PAGE_MASK; } /* to find an entry in a page-table-directory */ extern inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address) { return mm->pgd + (address >> PGDIR_SHIFT); } /* Find an entry in the second-level page table.. */ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) { return (pmd_t *) dir; } /* Find an entry in the third-level page table.. */ extern inline pte_t * pte_offset(pmd_t * dir, unsigned long address) { return (pte_t *) pmd_page(*dir) + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)); } /* * Allocate and free page tables. The xxx_kernel() versions are * used to allocate a kernel page table - this turns on ASN bits * if any. */ extern inline void pte_free_kernel(pte_t * pte) { free_page((unsigned long) pte); } extern const char bad_pmd_string[]; extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address) { address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); if (pmd_none(*pmd)) { pte_t * page = (pte_t *) get_free_page(GFP_KERNEL); if (pmd_none(*pmd)) { if (page) { pmd_val(*pmd) = _PAGE_TABLE | (unsigned long) page; return page + address; } pmd_val(*pmd) = _PAGE_TABLE | (unsigned long) BAD_PAGETABLE; return NULL; } free_page((unsigned long) page); } if (pmd_bad(*pmd)) { printk(bad_pmd_string, pmd_val(*pmd)); pmd_val(*pmd) = _PAGE_TABLE | (unsigned long) BAD_PAGETABLE; return NULL; } return (pte_t *) pmd_page(*pmd) + address; } /* * allocating and freeing a pmd is trivial: the 1-entry pmd is * inside the pgd, so has no extra memory associated with it. */ extern inline void pmd_free_kernel(pmd_t * pmd) { pmd_val(*pmd) = 0; } extern inline pmd_t * pmd_alloc_kernel(pgd_t * pgd, unsigned long address) { return (pmd_t *) pgd; } extern inline void pte_free(pte_t * pte) { free_page((unsigned long) pte); } extern inline pte_t * pte_alloc(pmd_t * pmd, unsigned long address) { address = (address >> (PAGE_SHIFT-2)) & 4*(PTRS_PER_PTE - 1); repeat: if (pmd_none(*pmd)) goto getnew; if (pmd_bad(*pmd)) goto fix; return (pte_t *) (pmd_page(*pmd) + address); getnew: { unsigned long page = __get_free_page(GFP_KERNEL); if (!pmd_none(*pmd)) goto freenew; if (!page) goto oom; memset((void *) page, 0, PAGE_SIZE); pmd_val(*pmd) = _PAGE_TABLE | page; return (pte_t *) (page + address); freenew: free_page(page); goto repeat; } fix: printk(bad_pmd_string, pmd_val(*pmd)); oom: pmd_val(*pmd) = _PAGE_TABLE | (unsigned long) BAD_PAGETABLE; return NULL; } /* * allocating and freeing a pmd is trivial: the 1-entry pmd is * inside the pgd, so has no extra memory associated with it. */ extern inline void pmd_free(pmd_t * pmd) { pmd_val(*pmd) = 0; } extern inline pmd_t * pmd_alloc(pgd_t * pgd, unsigned long address) { return (pmd_t *) pgd; } extern inline void pgd_free(pgd_t * pgd) { free_page((unsigned long) pgd); } extern inline pgd_t * pgd_alloc(void) { return (pgd_t *) get_free_page(GFP_KERNEL); } extern pgd_t swapper_pg_dir[1024]; /* * The i386 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. */ extern inline void update_mmu_cache(struct vm_area_struct * vma, unsigned long address, pte_t pte) { } #define SWP_TYPE(entry) (((entry) >> 1) & 0x7f) #define SWP_OFFSET(entry) ((entry) >> 8) #define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << 8)) #endif /* _I386_PAGE_H */