Merge branch 'core-efi-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
Linus Torvalds [Fri, 14 Dec 2012 18:08:40 +0000 (10:08 -0800)]
Pull x86 EFI update from Peter Anvin:
 "EFI tree, from Matt Fleming.  Most of the patches are the new efivarfs
  filesystem by Matt Garrett & co.  The balance are support for EFI
  wallclock in the absence of a hardware-specific driver, and various
  fixes and cleanups."

* 'core-efi-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  efivarfs: Make efivarfs_fill_super() static
  x86, efi: Check table header length in efi_bgrt_init()
  efivarfs: Use query_variable_info() to limit kmalloc()
  efivarfs: Fix return value of efivarfs_file_write()
  efivarfs: Return a consistent error when efivarfs_get_inode() fails
  efivarfs: Make 'datasize' unsigned long
  efivarfs: Add unique magic number
  efivarfs: Replace magic number with sizeof(attributes)
  efivarfs: Return an error if we fail to read a variable
  efi: Clarify GUID length calculations
  efivarfs: Implement exclusive access for {get,set}_variable
  efivarfs: efivarfs_fill_super() ensure we clean up correctly on error
  efivarfs: efivarfs_fill_super() ensure we free our temporary name
  efivarfs: efivarfs_fill_super() fix inode reference counts
  efivarfs: efivarfs_create() ensure we drop our reference on inode on error
  efivarfs: efivarfs_file_read ensure we free data in error paths
  x86-64/efi: Use EFI to deal with platform wall clock (again)
  x86/kernel: remove tboot 1:1 page table creation code
  x86, efi: 1:1 pagetable mapping for virtual EFI calls
  x86, mm: Include the entire kernel memory map in trampoline_pgd
  ...

15 files changed:
Documentation/filesystems/00-INDEX
Documentation/filesystems/efivarfs.txt [new file with mode: 0644]
arch/x86/include/asm/efi.h
arch/x86/kernel/tboot.c
arch/x86/mm/init_64.c
arch/x86/mm/ioremap.c
arch/x86/mm/pageattr.c
arch/x86/platform/efi/efi-bgrt.c
arch/x86/platform/efi/efi.c
arch/x86/platform/efi/efi_64.c
arch/x86/realmode/init.c
drivers/firmware/efivars.c
include/linux/efi.h
include/uapi/linux/magic.h
init/main.c

index 8c624a1..7b52ba7 100644 (file)
@@ -38,6 +38,8 @@ dnotify_test.c
        - example program for dnotify
 ecryptfs.txt
        - docs on eCryptfs: stacked cryptographic filesystem for Linux.
+efivarfs.txt
+       - info for the efivarfs filesystem.
 exofs.txt
        - info, usage, mount options, design about EXOFS.
 ext2.txt
diff --git a/Documentation/filesystems/efivarfs.txt b/Documentation/filesystems/efivarfs.txt
new file mode 100644 (file)
index 0000000..c477af0
--- /dev/null
@@ -0,0 +1,16 @@
+
+efivarfs - a (U)EFI variable filesystem
+
+The efivarfs filesystem was created to address the shortcomings of
+using entries in sysfs to maintain EFI variables. The old sysfs EFI
+variables code only supported variables of up to 1024 bytes. This
+limitation existed in version 0.99 of the EFI specification, but was
+removed before any full releases. Since variables can now be larger
+than a single page, sysfs isn't the best interface for this.
+
+Variables can be created, deleted and modified with the efivarfs
+filesystem.
+
+efivarfs is typically mounted like this,
+
+       mount -t efivarfs none /sys/firmware/efi/efivars
index 6e8fdf5..fd13815 100644 (file)
@@ -69,23 +69,37 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
        efi_call6((void *)(f), (u64)(a1), (u64)(a2), (u64)(a3),         \
                  (u64)(a4), (u64)(a5), (u64)(a6))
 
+extern unsigned long efi_call_virt_prelog(void);
+extern void efi_call_virt_epilog(unsigned long);
+
+#define efi_callx(x, func, ...)                                        \
+       ({                                                      \
+               efi_status_t __status;                          \
+               unsigned long __pgd;                            \
+                                                               \
+               __pgd = efi_call_virt_prelog();                 \
+               __status = efi_call##x(func, __VA_ARGS__);      \
+               efi_call_virt_epilog(__pgd);                    \
+               __status;                                       \
+       })
+
 #define efi_call_virt0(f)                              \
-       efi_call0((void *)(efi.systab->runtime->f))
+       efi_callx(0, (void *)(efi.systab->runtime->f))
 #define efi_call_virt1(f, a1)                                  \
-       efi_call1((void *)(efi.systab->runtime->f), (u64)(a1))
+       efi_callx(1, (void *)(efi.systab->runtime->f), (u64)(a1))
 #define efi_call_virt2(f, a1, a2)                                      \
-       efi_call2((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
+       efi_callx(2, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2))
 #define efi_call_virt3(f, a1, a2, a3)                                  \
-       efi_call3((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+       efi_callx(3, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
                  (u64)(a3))
 #define efi_call_virt4(f, a1, a2, a3, a4)                              \
-       efi_call4((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+       efi_callx(4, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
                  (u64)(a3), (u64)(a4))
 #define efi_call_virt5(f, a1, a2, a3, a4, a5)                          \
-       efi_call5((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+       efi_callx(5, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
                  (u64)(a3), (u64)(a4), (u64)(a5))
 #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6)                      \
-       efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
+       efi_callx(6, (void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
                  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
 extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
index f84fe00..d4f460f 100644 (file)
@@ -103,71 +103,13 @@ void __init tboot_probe(void)
        pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
 }
 
-static pgd_t *tboot_pg_dir;
-static struct mm_struct tboot_mm = {
-       .mm_rb          = RB_ROOT,
-       .pgd            = swapper_pg_dir,
-       .mm_users       = ATOMIC_INIT(2),
-       .mm_count       = ATOMIC_INIT(1),
-       .mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
-       .page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
-       .mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
-};
-
 static inline void switch_to_tboot_pt(void)
 {
-       write_cr3(virt_to_phys(tboot_pg_dir));
-}
-
-static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
-                         pgprot_t prot)
-{
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
-
-       pgd = pgd_offset(&tboot_mm, vaddr);
-       pud = pud_alloc(&tboot_mm, pgd, vaddr);
-       if (!pud)
-               return -1;
-       pmd = pmd_alloc(&tboot_mm, pud, vaddr);
-       if (!pmd)
-               return -1;
-       pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
-       if (!pte)
-               return -1;
-       set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
-       pte_unmap(pte);
-       return 0;
-}
-
-static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
-                          unsigned long nr)
-{
-       /* Reuse the original kernel mapping */
-       tboot_pg_dir = pgd_alloc(&tboot_mm);
-       if (!tboot_pg_dir)
-               return -1;
-
-       for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) {
-               if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC))
-                       return -1;
-       }
-
-       return 0;
-}
-
-static void tboot_create_trampoline(void)
-{
-       u32 map_base, map_size;
-
-       /* Create identity map for tboot shutdown code. */
-       map_base = PFN_DOWN(tboot->tboot_base);
-       map_size = PFN_UP(tboot->tboot_size);
-       if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size))
-               panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n",
-                     map_base, map_size);
+#ifdef CONFIG_X86_32
+       load_cr3(initial_page_table);
+#else
+       write_cr3(real_mode_header->trampoline_pgd);
+#endif
 }
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -225,14 +167,6 @@ void tboot_shutdown(u32 shutdown_type)
        if (!tboot_enabled())
                return;
 
-       /*
-        * if we're being called before the 1:1 mapping is set up then just
-        * return and let the normal shutdown happen; this should only be
-        * due to very early panic()
-        */
-       if (!tboot_pg_dir)
-               return;
-
        /* if this is S3 then set regions to MAC */
        if (shutdown_type == TB_SHUTDOWN_S3)
                if (tboot_setup_sleep())
@@ -343,8 +277,6 @@ static __init int tboot_late_init(void)
        if (!tboot_enabled())
                return 0;
 
-       tboot_create_trampoline();
-
        atomic_set(&ap_wfs_count, 0);
        register_hotcpu_notifier(&tboot_cpu_notifier);
 
index 2ead3c8..07519a1 100644 (file)
@@ -108,13 +108,13 @@ void sync_global_pgds(unsigned long start, unsigned long end)
        for (address = start; address <= end; address += PGDIR_SIZE) {
                const pgd_t *pgd_ref = pgd_offset_k(address);
                struct page *page;
+               pgd_t *pgd;
 
                if (pgd_none(*pgd_ref))
                        continue;
 
                spin_lock(&pgd_lock);
                list_for_each_entry(page, &pgd_list, lru) {
-                       pgd_t *pgd;
                        spinlock_t *pgt_lock;
 
                        pgd = (pgd_t *)page_address(page) + pgd_index(address);
@@ -130,6 +130,13 @@ void sync_global_pgds(unsigned long start, unsigned long end)
 
                        spin_unlock(pgt_lock);
                }
+
+               pgd = __va(real_mode_header->trampoline_pgd);
+               pgd += pgd_index(address);
+
+               if (pgd_none(*pgd))
+                       set_pgd(pgd, *pgd_ref);
+
                spin_unlock(&pgd_lock);
        }
 }
index 78fe3f1..e190f7b 100644 (file)
@@ -50,6 +50,107 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
        return err;
 }
 
+#ifdef CONFIG_X86_64
+static void ident_pte_range(unsigned long paddr, unsigned long vaddr,
+                           pmd_t *ppmd, pmd_t *vpmd, unsigned long end)
+{
+       pte_t *ppte = pte_offset_kernel(ppmd, paddr);
+       pte_t *vpte = pte_offset_kernel(vpmd, vaddr);
+
+       do {
+               set_pte(ppte, *vpte);
+       } while (ppte++, vpte++, vaddr += PAGE_SIZE, vaddr != end);
+}
+
+static int ident_pmd_range(unsigned long paddr, unsigned long vaddr,
+                           pud_t *ppud, pud_t *vpud, unsigned long end)
+{
+       pmd_t *ppmd = pmd_offset(ppud, paddr);
+       pmd_t *vpmd = pmd_offset(vpud, vaddr);
+       unsigned long next;
+
+       do {
+               next = pmd_addr_end(vaddr, end);
+
+               if (!pmd_present(*ppmd)) {
+                       pte_t *ppte = (pte_t *)get_zeroed_page(GFP_KERNEL);
+                       if (!ppte)
+                               return 1;
+
+                       set_pmd(ppmd, __pmd(_KERNPG_TABLE | __pa(ppte)));
+               }
+
+               ident_pte_range(paddr, vaddr, ppmd, vpmd, next);
+       } while (ppmd++, vpmd++, vaddr = next, vaddr != end);
+
+       return 0;
+}
+
+static int ident_pud_range(unsigned long paddr, unsigned long vaddr,
+                           pgd_t *ppgd, pgd_t *vpgd, unsigned long end)
+{
+       pud_t *ppud = pud_offset(ppgd, paddr);
+       pud_t *vpud = pud_offset(vpgd, vaddr);
+       unsigned long next;
+
+       do {
+               next = pud_addr_end(vaddr, end);
+
+               if (!pud_present(*ppud)) {
+                       pmd_t *ppmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+                       if (!ppmd)
+                               return 1;
+
+                       set_pud(ppud, __pud(_KERNPG_TABLE | __pa(ppmd)));
+               }
+
+               if (ident_pmd_range(paddr, vaddr, ppud, vpud, next))
+                       return 1;
+       } while (ppud++, vpud++, vaddr = next, vaddr != end);
+
+       return 0;
+}
+
+static int insert_identity_mapping(resource_size_t paddr, unsigned long vaddr,
+                                   unsigned long size)
+{
+       unsigned long end = vaddr + size;
+       unsigned long next;
+       pgd_t *vpgd, *ppgd;
+
+       /* Don't map over the guard hole. */
+       if (paddr >= 0x800000000000 || paddr + size > 0x800000000000)
+               return 1;
+
+       ppgd = __va(real_mode_header->trampoline_pgd) + pgd_index(paddr);
+
+       vpgd = pgd_offset_k(vaddr);
+       do {
+               next = pgd_addr_end(vaddr, end);
+
+               if (!pgd_present(*ppgd)) {
+                       pud_t *ppud = (pud_t *)get_zeroed_page(GFP_KERNEL);
+                       if (!ppud)
+                               return 1;
+
+                       set_pgd(ppgd, __pgd(_KERNPG_TABLE | __pa(ppud)));
+               }
+
+               if (ident_pud_range(paddr, vaddr, ppgd, vpgd, next))
+                       return 1;
+       } while (ppgd++, vpgd++, vaddr = next, vaddr != end);
+
+       return 0;
+}
+#else
+static inline int insert_identity_mapping(resource_size_t paddr,
+                                         unsigned long vaddr,
+                                         unsigned long size)
+{
+       return 0;
+}
+#endif /* CONFIG_X86_64 */
+
 /*
  * Remap an arbitrary physical address space into the kernel virtual
  * address space. Needed when the kernel wants to access high addresses
@@ -163,6 +264,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
        ret_addr = (void __iomem *) (vaddr + offset);
        mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
 
+       if (insert_identity_mapping(phys_addr, vaddr, size))
+               printk(KERN_WARNING "ioremap: unable to map 0x%llx in identity pagetable\n",
+                                       (unsigned long long)phys_addr);
+
        /*
         * Check if the request spans more than any BAR in the iomem resource
         * tree.
index a718e0d..931930a 100644 (file)
@@ -919,11 +919,13 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
        /*
         * On success we use clflush, when the CPU supports it to
-        * avoid the wbindv. If the CPU does not support it and in the
-        * error case we fall back to cpa_flush_all (which uses
-        * wbindv):
+        * avoid the wbindv. If the CPU does not support it, in the
+        * error case, and during early boot (for EFI) we fall back
+        * to cpa_flush_all (which uses wbinvd):
         */
-       if (!ret && cpu_has_clflush) {
+       if (early_boot_irqs_disabled)
+               __cpa_flush_all((void *)(long)cache);
+       else if (!ret && cpu_has_clflush) {
                if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
                        cpa_flush_array(addr, numpages, cache,
                                        cpa.flags, pages);
index f6a0c1b..d9c1b95 100644 (file)
@@ -39,6 +39,8 @@ void efi_bgrt_init(void)
        if (ACPI_FAILURE(status))
                return;
 
+       if (bgrt_tab->header.length < sizeof(*bgrt_tab))
+               return;
        if (bgrt_tab->version != 1)
                return;
        if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
index ad44391..0a34d9e 100644 (file)
@@ -239,22 +239,7 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
        return status;
 }
 
-static efi_status_t __init phys_efi_get_time(efi_time_t *tm,
-                                            efi_time_cap_t *tc)
-{
-       unsigned long flags;
-       efi_status_t status;
-
-       spin_lock_irqsave(&rtc_lock, flags);
-       efi_call_phys_prelog();
-       status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm),
-                               virt_to_phys(tc));
-       efi_call_phys_epilog();
-       spin_unlock_irqrestore(&rtc_lock, flags);
-       return status;
-}
-
-int efi_set_rtc_mmss(unsigned long nowtime)
+static int efi_set_rtc_mmss(unsigned long nowtime)
 {
        int real_seconds, real_minutes;
        efi_status_t    status;
@@ -283,7 +268,7 @@ int efi_set_rtc_mmss(unsigned long nowtime)
        return 0;
 }
 
-unsigned long efi_get_time(void)
+static unsigned long efi_get_time(void)
 {
        efi_status_t status;
        efi_time_t eft;
@@ -639,18 +624,13 @@ static int __init efi_runtime_init(void)
        }
        /*
         * We will only need *early* access to the following
-        * two EFI runtime services before set_virtual_address_map
+        * EFI runtime service before set_virtual_address_map
         * is invoked.
         */
-       efi_phys.get_time = (efi_get_time_t *)runtime->get_time;
        efi_phys.set_virtual_address_map =
                (efi_set_virtual_address_map_t *)
                runtime->set_virtual_address_map;
-       /*
-        * Make efi_get_time can be called before entering
-        * virtual mode.
-        */
-       efi.get_time = phys_efi_get_time;
+
        early_iounmap(runtime, sizeof(efi_runtime_services_t));
 
        return 0;
@@ -736,12 +716,10 @@ void __init efi_init(void)
                efi_enabled = 0;
                return;
        }
-#ifdef CONFIG_X86_32
        if (efi_is_native()) {
                x86_platform.get_wallclock = efi_get_time;
                x86_platform.set_wallclock = efi_set_rtc_mmss;
        }
-#endif
 
 #if EFI_DEBUG
        print_efi_memmap();
index 95fd505..06c8b2e 100644 (file)
@@ -58,6 +58,21 @@ static void __init early_code_mapping_set_exec(int executable)
        }
 }
 
+unsigned long efi_call_virt_prelog(void)
+{
+       unsigned long saved;
+
+       saved = read_cr3();
+       write_cr3(real_mode_header->trampoline_pgd);
+
+       return saved;
+}
+
+void efi_call_virt_epilog(unsigned long saved)
+{
+       write_cr3(saved);
+}
+
 void __init efi_call_phys_prelog(void)
 {
        unsigned long vaddress;
index cbca565..8e6ab61 100644 (file)
@@ -78,8 +78,21 @@ void __init setup_real_mode(void)
        *trampoline_cr4_features = read_cr4();
 
        trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
-       trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE;
-       trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE;
+
+       /*
+        * Create an identity mapping for all of physical memory.
+        */
+       for (i = 0; i <= pgd_index(max_pfn << PAGE_SHIFT); i++) {
+               int index = pgd_index(PAGE_OFFSET) + i;
+
+               trampoline_pgd[i] = (u64)pgd_val(swapper_pg_dir[index]);
+       }
+
+       /*
+        * Copy the upper-half of the kernel pages tables.
+        */
+       for (i = pgd_index(PAGE_OFFSET); i < PTRS_PER_PGD; i++)
+               trampoline_pgd[i] = (u64)pgd_val(swapper_pg_dir[i]);
 #endif
 }
 
index 6e51c1e..52c5d89 100644 (file)
 #include <linux/slab.h>
 #include <linux/pstore.h>
 
+#include <linux/fs.h>
+#include <linux/ramfs.h>
+#include <linux/pagemap.h>
+
 #include <asm/uaccess.h>
 
 #define EFIVARS_VERSION "0.08"
@@ -93,6 +97,12 @@ MODULE_VERSION(EFIVARS_VERSION);
 #define DUMP_NAME_LEN 52
 
 /*
+ * Length of a GUID string (strlen("aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"))
+ * not including trailing NUL
+ */
+#define GUID_LEN 36
+
+/*
  * The maximum size of VariableName + Data = 1024
  * Therefore, it's reasonable to save that much
  * space in each part of the structure,
@@ -108,7 +118,6 @@ struct efi_variable {
        __u32         Attributes;
 } __attribute__((packed));
 
-
 struct efivar_entry {
        struct efivars *efivars;
        struct efi_variable var;
@@ -122,6 +131,9 @@ struct efivar_attribute {
        ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count);
 };
 
+static struct efivars __efivars;
+static struct efivar_operations ops;
+
 #define PSTORE_EFI_ATTRIBUTES \
        (EFI_VARIABLE_NON_VOLATILE | \
         EFI_VARIABLE_BOOTSERVICE_ACCESS | \
@@ -629,14 +641,482 @@ static struct kobj_type efivar_ktype = {
        .default_attrs = def_attrs,
 };
 
-static struct pstore_info efi_pstore_info;
-
 static inline void
 efivar_unregister(struct efivar_entry *var)
 {
        kobject_put(&var->kobj);
 }
 
+static int efivarfs_file_open(struct inode *inode, struct file *file)
+{
+       file->private_data = inode->i_private;
+       return 0;
+}
+
+static int efi_status_to_err(efi_status_t status)
+{
+       int err;
+
+       switch (status) {
+       case EFI_INVALID_PARAMETER:
+               err = -EINVAL;
+               break;
+       case EFI_OUT_OF_RESOURCES:
+               err = -ENOSPC;
+               break;
+       case EFI_DEVICE_ERROR:
+               err = -EIO;
+               break;
+       case EFI_WRITE_PROTECTED:
+               err = -EROFS;
+               break;
+       case EFI_SECURITY_VIOLATION:
+               err = -EACCES;
+               break;
+       case EFI_NOT_FOUND:
+               err = -ENOENT;
+               break;
+       default:
+               err = -EINVAL;
+       }
+
+       return err;
+}
+
+static ssize_t efivarfs_file_write(struct file *file,
+               const char __user *userbuf, size_t count, loff_t *ppos)
+{
+       struct efivar_entry *var = file->private_data;
+       struct efivars *efivars;
+       efi_status_t status;
+       void *data;
+       u32 attributes;
+       struct inode *inode = file->f_mapping->host;
+       unsigned long datasize = count - sizeof(attributes);
+       unsigned long newdatasize;
+       u64 storage_size, remaining_size, max_size;
+       ssize_t bytes = 0;
+
+       if (count < sizeof(attributes))
+               return -EINVAL;
+
+       if (copy_from_user(&attributes, userbuf, sizeof(attributes)))
+               return -EFAULT;
+
+       if (attributes & ~(EFI_VARIABLE_MASK))
+               return -EINVAL;
+
+       efivars = var->efivars;
+
+       /*
+        * Ensure that the user can't allocate arbitrarily large
+        * amounts of memory. Pick a default size of 64K if
+        * QueryVariableInfo() isn't supported by the firmware.
+        */
+       spin_lock(&efivars->lock);
+
+       if (!efivars->ops->query_variable_info)
+               status = EFI_UNSUPPORTED;
+       else {
+               const struct efivar_operations *fops = efivars->ops;
+               status = fops->query_variable_info(attributes, &storage_size,
+                                                  &remaining_size, &max_size);
+       }
+
+       spin_unlock(&efivars->lock);
+
+       if (status != EFI_SUCCESS) {
+               if (status != EFI_UNSUPPORTED)
+                       return efi_status_to_err(status);
+
+               remaining_size = 65536;
+       }
+
+       if (datasize > remaining_size)
+               return -ENOSPC;
+
+       data = kmalloc(datasize, GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) {
+               bytes = -EFAULT;
+               goto out;
+       }
+
+       if (validate_var(&var->var, data, datasize) == false) {
+               bytes = -EINVAL;
+               goto out;
+       }
+
+       /*
+        * The lock here protects the get_variable call, the conditional
+        * set_variable call, and removal of the variable from the efivars
+        * list (in the case of an authenticated delete).
+        */
+       spin_lock(&efivars->lock);
+
+       status = efivars->ops->set_variable(var->var.VariableName,
+                                           &var->var.VendorGuid,
+                                           attributes, datasize,
+                                           data);
+
+       if (status != EFI_SUCCESS) {
+               spin_unlock(&efivars->lock);
+               kfree(data);
+
+               return efi_status_to_err(status);
+       }
+
+       bytes = count;
+
+       /*
+        * Writing to the variable may have caused a change in size (which
+        * could either be an append or an overwrite), or the variable to be
+        * deleted. Perform a GetVariable() so we can tell what actually
+        * happened.
+        */
+       newdatasize = 0;
+       status = efivars->ops->get_variable(var->var.VariableName,
+                                           &var->var.VendorGuid,
+                                           NULL, &newdatasize,
+                                           NULL);
+
+       if (status == EFI_BUFFER_TOO_SMALL) {
+               spin_unlock(&efivars->lock);
+               mutex_lock(&inode->i_mutex);
+               i_size_write(inode, newdatasize + sizeof(attributes));
+               mutex_unlock(&inode->i_mutex);
+
+       } else if (status == EFI_NOT_FOUND) {
+               list_del(&var->list);
+               spin_unlock(&efivars->lock);
+               efivar_unregister(var);
+               drop_nlink(inode);
+               dput(file->f_dentry);
+
+       } else {
+               spin_unlock(&efivars->lock);
+               pr_warn("efivarfs: inconsistent EFI variable implementation? "
+                               "status = %lx\n", status);
+       }
+
+out:
+       kfree(data);
+
+       return bytes;
+}
+
+static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
+               size_t count, loff_t *ppos)
+{
+       struct efivar_entry *var = file->private_data;
+       struct efivars *efivars = var->efivars;
+       efi_status_t status;
+       unsigned long datasize = 0;
+       u32 attributes;
+       void *data;
+       ssize_t size = 0;
+
+       spin_lock(&efivars->lock);
+       status = efivars->ops->get_variable(var->var.VariableName,
+                                           &var->var.VendorGuid,
+                                           &attributes, &datasize, NULL);
+       spin_unlock(&efivars->lock);
+
+       if (status != EFI_BUFFER_TOO_SMALL)
+               return efi_status_to_err(status);
+
+       data = kmalloc(datasize + sizeof(attributes), GFP_KERNEL);
+
+       if (!data)
+               return -ENOMEM;
+
+       spin_lock(&efivars->lock);
+       status = efivars->ops->get_variable(var->var.VariableName,
+                                           &var->var.VendorGuid,
+                                           &attributes, &datasize,
+                                           (data + sizeof(attributes)));
+       spin_unlock(&efivars->lock);
+
+       if (status != EFI_SUCCESS) {
+               size = efi_status_to_err(status);
+               goto out_free;
+       }
+
+       memcpy(data, &attributes, sizeof(attributes));
+       size = simple_read_from_buffer(userbuf, count, ppos,
+                                      data, datasize + sizeof(attributes));
+out_free:
+       kfree(data);
+
+       return size;
+}
+
+static void efivarfs_evict_inode(struct inode *inode)
+{
+       clear_inode(inode);
+}
+
+static const struct super_operations efivarfs_ops = {
+       .statfs = simple_statfs,
+       .drop_inode = generic_delete_inode,
+       .evict_inode = efivarfs_evict_inode,
+       .show_options = generic_show_options,
+};
+
+static struct super_block *efivarfs_sb;
+
+static const struct inode_operations efivarfs_dir_inode_operations;
+
+static const struct file_operations efivarfs_file_operations = {
+       .open   = efivarfs_file_open,
+       .read   = efivarfs_file_read,
+       .write  = efivarfs_file_write,
+       .llseek = no_llseek,
+};
+
+static struct inode *efivarfs_get_inode(struct super_block *sb,
+                               const struct inode *dir, int mode, dev_t dev)
+{
+       struct inode *inode = new_inode(sb);
+
+       if (inode) {
+               inode->i_ino = get_next_ino();
+               inode->i_uid = inode->i_gid = 0;
+               inode->i_mode = mode;
+               inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+               switch (mode & S_IFMT) {
+               case S_IFREG:
+                       inode->i_fop = &efivarfs_file_operations;
+                       break;
+               case S_IFDIR:
+                       inode->i_op = &efivarfs_dir_inode_operations;
+                       inode->i_fop = &simple_dir_operations;
+                       inc_nlink(inode);
+                       break;
+               }
+       }
+       return inode;
+}
+
+static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid)
+{
+       guid->b[0] = hex_to_bin(str[6]) << 4 | hex_to_bin(str[7]);
+       guid->b[1] = hex_to_bin(str[4]) << 4 | hex_to_bin(str[5]);
+       guid->b[2] = hex_to_bin(str[2]) << 4 | hex_to_bin(str[3]);
+       guid->b[3] = hex_to_bin(str[0]) << 4 | hex_to_bin(str[1]);
+       guid->b[4] = hex_to_bin(str[11]) << 4 | hex_to_bin(str[12]);
+       guid->b[5] = hex_to_bin(str[9]) << 4 | hex_to_bin(str[10]);
+       guid->b[6] = hex_to_bin(str[16]) << 4 | hex_to_bin(str[17]);
+       guid->b[7] = hex_to_bin(str[14]) << 4 | hex_to_bin(str[15]);
+       guid->b[8] = hex_to_bin(str[19]) << 4 | hex_to_bin(str[20]);
+       guid->b[9] = hex_to_bin(str[21]) << 4 | hex_to_bin(str[22]);
+       guid->b[10] = hex_to_bin(str[24]) << 4 | hex_to_bin(str[25]);
+       guid->b[11] = hex_to_bin(str[26]) << 4 | hex_to_bin(str[27]);
+       guid->b[12] = hex_to_bin(str[28]) << 4 | hex_to_bin(str[29]);
+       guid->b[13] = hex_to_bin(str[30]) << 4 | hex_to_bin(str[31]);
+       guid->b[14] = hex_to_bin(str[32]) << 4 | hex_to_bin(str[33]);
+       guid->b[15] = hex_to_bin(str[34]) << 4 | hex_to_bin(str[35]);
+}
+
+static int efivarfs_create(struct inode *dir, struct dentry *dentry,
+                         umode_t mode, bool excl)
+{
+       struct inode *inode;
+       struct efivars *efivars = &__efivars;
+       struct efivar_entry *var;
+       int namelen, i = 0, err = 0;
+
+       /*
+        * We need a GUID, plus at least one letter for the variable name,
+        * plus the '-' separator
+        */
+       if (dentry->d_name.len < GUID_LEN + 2)
+               return -EINVAL;
+
+       inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0);
+       if (!inode)
+               return -ENOMEM;
+
+       var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL);
+       if (!var) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       /* length of the variable name itself: remove GUID and separator */
+       namelen = dentry->d_name.len - GUID_LEN - 1;
+
+       efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1,
+                       &var->var.VendorGuid);
+
+       for (i = 0; i < namelen; i++)
+               var->var.VariableName[i] = dentry->d_name.name[i];
+
+       var->var.VariableName[i] = '\0';
+
+       inode->i_private = var;
+       var->efivars = efivars;
+       var->kobj.kset = efivars->kset;
+
+       err = kobject_init_and_add(&var->kobj, &efivar_ktype, NULL, "%s",
+                            dentry->d_name.name);
+       if (err)
+               goto out;
+
+       kobject_uevent(&var->kobj, KOBJ_ADD);
+       spin_lock(&efivars->lock);
+       list_add(&var->list, &efivars->list);
+       spin_unlock(&efivars->lock);
+       d_instantiate(dentry, inode);
+       dget(dentry);
+out:
+       if (err) {
+               kfree(var);
+               iput(inode);
+       }
+       return err;
+}
+
+static int efivarfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+       struct efivar_entry *var = dentry->d_inode->i_private;
+       struct efivars *efivars = var->efivars;
+       efi_status_t status;
+
+       spin_lock(&efivars->lock);
+
+       status = efivars->ops->set_variable(var->var.VariableName,
+                                           &var->var.VendorGuid,
+                                           0, 0, NULL);
+
+       if (status == EFI_SUCCESS || status == EFI_NOT_FOUND) {
+               list_del(&var->list);
+               spin_unlock(&efivars->lock);
+               efivar_unregister(var);
+               drop_nlink(dir);
+               dput(dentry);
+               return 0;
+       }
+
+       spin_unlock(&efivars->lock);
+       return -EINVAL;
+};
+
+static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+       struct inode *inode = NULL;
+       struct dentry *root;
+       struct efivar_entry *entry, *n;
+       struct efivars *efivars = &__efivars;
+       char *name;
+
+       efivarfs_sb = sb;
+
+       sb->s_maxbytes          = MAX_LFS_FILESIZE;
+       sb->s_blocksize         = PAGE_CACHE_SIZE;
+       sb->s_blocksize_bits    = PAGE_CACHE_SHIFT;
+       sb->s_magic             = EFIVARFS_MAGIC;
+       sb->s_op                = &efivarfs_ops;
+       sb->s_time_gran         = 1;
+
+       inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0);
+       if (!inode)
+               return -ENOMEM;
+       inode->i_op = &efivarfs_dir_inode_operations;
+
+       root = d_make_root(inode);
+       sb->s_root = root;
+       if (!root)
+               return -ENOMEM;
+
+       list_for_each_entry_safe(entry, n, &efivars->list, list) {
+               struct dentry *dentry, *root = efivarfs_sb->s_root;
+               unsigned long size = 0;
+               int len, i;
+
+               inode = NULL;
+
+               len = utf16_strlen(entry->var.VariableName);
+
+               /* name, plus '-', plus GUID, plus NUL*/
+               name = kmalloc(len + 1 + GUID_LEN + 1, GFP_ATOMIC);
+               if (!name)
+                       goto fail;
+
+               for (i = 0; i < len; i++)
+                       name[i] = entry->var.VariableName[i] & 0xFF;
+
+               name[len] = '-';
+
+               efi_guid_unparse(&entry->var.VendorGuid, name + len + 1);
+
+               name[len+GUID_LEN+1] = '\0';
+
+               inode = efivarfs_get_inode(efivarfs_sb, root->d_inode,
+                                         S_IFREG | 0644, 0);
+               if (!inode)
+                       goto fail_name;
+
+               dentry = d_alloc_name(root, name);
+               if (!dentry)
+                       goto fail_inode;
+
+               /* copied by the above to local storage in the dentry. */
+               kfree(name);
+
+               spin_lock(&efivars->lock);
+               efivars->ops->get_variable(entry->var.VariableName,
+                                          &entry->var.VendorGuid,
+                                          &entry->var.Attributes,
+                                          &size,
+                                          NULL);
+               spin_unlock(&efivars->lock);
+
+               mutex_lock(&inode->i_mutex);
+               inode->i_private = entry;
+               i_size_write(inode, size+4);
+               mutex_unlock(&inode->i_mutex);
+               d_add(dentry, inode);
+       }
+
+       return 0;
+
+fail_inode:
+       iput(inode);
+fail_name:
+       kfree(name);
+fail:
+       return -ENOMEM;
+}
+
+static struct dentry *efivarfs_mount(struct file_system_type *fs_type,
+                                   int flags, const char *dev_name, void *data)
+{
+       return mount_single(fs_type, flags, data, efivarfs_fill_super);
+}
+
+static void efivarfs_kill_sb(struct super_block *sb)
+{
+       kill_litter_super(sb);
+       efivarfs_sb = NULL;
+}
+
+static struct file_system_type efivarfs_type = {
+       .name    = "efivarfs",
+       .mount   = efivarfs_mount,
+       .kill_sb = efivarfs_kill_sb,
+};
+
+static const struct inode_operations efivarfs_dir_inode_operations = {
+       .lookup = simple_lookup,
+       .unlink = efivarfs_unlink,
+       .create = efivarfs_create,
+};
+
+static struct pstore_info efi_pstore_info;
+
 #ifdef CONFIG_PSTORE
 
 static int efi_pstore_open(struct pstore_info *psi)
@@ -1065,11 +1545,18 @@ efivar_create_sysfs_entry(struct efivars *efivars,
                          efi_char16_t *variable_name,
                          efi_guid_t *vendor_guid)
 {
-       int i, short_name_size = variable_name_size / sizeof(efi_char16_t) + 38;
+       int i, short_name_size;
        char *short_name;
        struct efivar_entry *new_efivar;
 
-       short_name = kzalloc(short_name_size + 1, GFP_KERNEL);
+       /*
+        * Length of the variable bytes in ASCII, plus the '-' separator,
+        * plus the GUID, plus trailing NUL
+        */
+       short_name_size = variable_name_size / sizeof(efi_char16_t)
+                               + 1 + GUID_LEN + 1;
+
+       short_name = kzalloc(short_name_size, GFP_KERNEL);
        new_efivar = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL);
 
        if (!short_name || !new_efivar)  {
@@ -1189,6 +1676,7 @@ void unregister_efivars(struct efivars *efivars)
                sysfs_remove_bin_file(&efivars->kset->kobj, efivars->del_var);
        kfree(efivars->new_var);
        kfree(efivars->del_var);
+       kobject_put(efivars->kobject);
        kset_unregister(efivars->kset);
 }
 EXPORT_SYMBOL_GPL(unregister_efivars);
@@ -1220,6 +1708,14 @@ int register_efivars(struct efivars *efivars,
                goto out;
        }
 
+       efivars->kobject = kobject_create_and_add("efivars", parent_kobj);
+       if (!efivars->kobject) {
+               pr_err("efivars: Subsystem registration failed.\n");
+               error = -ENOMEM;
+               kset_unregister(efivars->kset);
+               goto out;
+       }
+
        /*
         * Per EFI spec, the maximum storage allocated for both
         * the variable name and variable data is 1024 bytes.
@@ -1262,6 +1758,8 @@ int register_efivars(struct efivars *efivars,
                pstore_register(&efivars->efi_pstore_info);
        }
 
+       register_filesystem(&efivarfs_type);
+
 out:
        kfree(variable_name);
 
@@ -1269,9 +1767,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(register_efivars);
 
-static struct efivars __efivars;
-static struct efivar_operations ops;
-
 /*
  * For now we register the efi subsystem with the firmware subsystem
  * and the vars subsystem with the efi subsystem.  In the future, it
@@ -1302,6 +1797,7 @@ efivars_init(void)
        ops.set_variable = efi.set_variable;
        ops.get_next_variable = efi.get_next_variable;
        ops.query_variable_info = efi.query_variable_info;
+
        error = register_efivars(&__efivars, &ops, efi_kobj);
        if (error)
                goto err_put;
index b02099d..02a6941 100644 (file)
 #define EFI_UNSUPPORTED                ( 3 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_BAD_BUFFER_SIZE     ( 4 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_BUFFER_TOO_SMALL   ( 5 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_NOT_READY          ( 6 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_DEVICE_ERROR       ( 7 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_WRITE_PROTECTED    ( 8 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_OUT_OF_RESOURCES   ( 9 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_NOT_FOUND          (14 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1)))
 
 typedef unsigned long efi_status_t;
 typedef u8 efi_bool_t;
@@ -582,8 +587,6 @@ extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
                struct resource *data_resource, struct resource *bss_resource);
-extern unsigned long efi_get_time(void);
-extern int efi_set_rtc_mmss(unsigned long nowtime);
 extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
@@ -729,6 +732,7 @@ struct efivars {
        spinlock_t lock;
        struct list_head list;
        struct kset *kset;
+       struct kobject *kobject;
        struct bin_attribute *new_var, *del_var;
        const struct efivar_operations *ops;
        struct efivar_entry *walk_entry;
index e15192c..12f68c7 100644 (file)
@@ -27,6 +27,7 @@
 #define ISOFS_SUPER_MAGIC      0x9660
 #define JFFS2_SUPER_MAGIC      0x72b6
 #define PSTOREFS_MAGIC         0x6165676C
+#define EFIVARFS_MAGIC         0xde5e81e4
 
 #define MINIX_SUPER_MAGIC      0x137F          /* minix v1 fs, 14 char names */
 #define MINIX_SUPER_MAGIC2     0x138F          /* minix v1 fs, 30 char names */
index 63ae904..6af5470 100644 (file)
@@ -463,6 +463,10 @@ static void __init mm_init(void)
        percpu_init_late();
        pgtable_cache_init();
        vmalloc_init();
+#ifdef CONFIG_X86
+       if (efi_enabled)
+               efi_enter_virtual_mode();
+#endif
 }
 
 asmlinkage void __init start_kernel(void)
@@ -603,10 +607,6 @@ asmlinkage void __init start_kernel(void)
        calibrate_delay();
        pidmap_init();
        anon_vma_init();
-#ifdef CONFIG_X86
-       if (efi_enabled)
-               efi_enter_virtual_mode();
-#endif
        thread_info_cache_init();
        cred_init();
        fork_init(totalram_pages);