Baseline for first public release.
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
--- /dev/null
+*.o
+*.mod.[co]
+*.cmd
+.tmp_versions
+Module.symvers
+modules.order
+jailhouse.ko
+hypervisor/include/jailhouse/config.h
+hypervisor/hypervisor.lds
+hypervisor/jailhouse.bin
+tools/jailhouse
+config/*.cell
+inmate/*.bin
--- /dev/null
+This copyright does not cover applications or operating systems that
+run inside hypervisor cells, also if they use hypervisor services by
+normal hypercalls. This is considered normal use of the hypervisor
+and is not a "derived work".
+
+---------------------------------------------------------------------
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
--- /dev/null
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+# Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2. See
+# the COPYING file in the top-level directory.
+#
+
+subdir-y := hypervisor config inmate
+
+obj-m := jailhouse.o
+
+ccflags-y := -I$(src)/hypervisor/arch/$(SRCARCH)/include \
+ -I$(src)/hypervisor/include
+
+jailhouse-y := main.o
+
+# out-of-tree build
+
+KERNELDIR = /lib/modules/`uname -r`/build
+
+modules modules_install clean:
+ $(MAKE) -C $(KERNELDIR) SUBDIRS=`pwd` $@
+
+install: modules_install
+ depmod -aq
+
+.PHONY: modules_install install clean
--- /dev/null
+JAILHOUSE
+=========
+
+Jailhouse is a partitioning Hypervisor based on Linux. It is able to run
+bare-metal applications or (adapted) operating systems besides Linux. For this
+purpose it configures CPU and device virtualization features of the hardware
+platform in a way that none of these domains, called "cells" here, can
+interfere with each other in an unacceptable way.
+
+Jailhouse is optimized for simplicity rather than feature richness. Once
+activated, it runs bare-metal, i.e. it takes full control over the hardware
+and need no external support. However, in contrast to other bare-metal
+hypervisors, it is loaded and configured by a normal Linux system. Its
+management interface is based on Linux infrastructure. So you boot Linux
+first, then you enable Jailhouse and finally you split off parts of the
+system's resources and assign them to additional cells.
+
+
+WARNING: This is work in progress! Don't expect things to be complete in any
+dimension. Use at your own risk. And keep the reset button in reach.
+
+
+Requirements (preliminary)
+--------------------------
+
+currently:
+ - Intel x86 processor with VMX support, more precisely
+ - EPT (extended page tables)
+ - unrestricted guest mode
+ - at least 2 logical CPUs
+
+upcoming:
+ - Intel IOMMU with interrupt remapping support
+
+
+Build
+-----
+
+Simply run make, optionally specifying the target kernel directory:
+
+ make [KERNELDIR=/path/to/kernel/objects]
+
+Note that the command line tool "jailhouse" requires a separate make run from
+within the tools/ directory.
+
+
+Configuration
+-------------
+
+Jailhouse requires one configuration file for the complete system and one for
+each additional cell beside Linux. The configuration is currently being
+defined manually by filling C structures. To study the structure, use
+config/qemu-vm.c for a system configuration and config/minimal.c for a cell
+configuration as reference. The build system will pick up every .c file from
+the config/ directory and generate a corresponding .cell file. .cell files can
+then be passed to the jailhouse command line tool for enabling the hypervisor
+and creating new cells.
+
+
+Demonstration in QEMU/KVM
+-------------------------
+
+The included system configuration qemu-vm.c can be used to run Jailhouse in
+QEMU/KVM virtual machine on Intel x86 hosts. Currently it requires kvm.git,
+next branch on the host (in order to get support for nested unrestricted guest
+mode). 3.13 is expected to include all necessary feature for this test. You
+also need a Linux guest image with a recent kernel (tested with >= 3.9) and
+the ability to build a module for this kernel. Make sure the kvm-intel module
+was loaded with nested=1 to enable nested VMX support. Start the virtual
+machine as follows:
+
+ qemu-system-x86_64 LinuxInstallation.img -m 1G -enable-kvm -serial stdio \
+ -cpu kvm64,-kvm_pv_eoi,-kvm_steal_time,-kvm_asyncpf,-kvmclock,+vmx,+x2apic \
+ -smp 4
+
+Inside the VM, make sure that jailhouse.bin, generated by the build process,
+is available for firmware loading (typically /lib/firmware). Load jailhouse.ko
+and then enable Jailhouse like this:
+
+ jailhouse enable /path/to/qemu-vm.cell
+
+Next you can create a cell with a demonstration application as follows:
+
+ jailhouse cell create /path/to/minimal.cell /path/to/apic-demo.bin \
+ -l 0xf0000
+
+apic-demo.bin is left by the built process in the inmate/ directory. This
+application will program the APIC timer interrupt to fire at 10 Hz, measuring
+the jitter against the PM timer and displaying the result on the
+console. Given that this demonstration runs in a virtual machine, obviously
+no decent latencies should be expected.
--- /dev/null
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+# Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2. See
+# the COPYING file in the top-level directory.
+#
+
+ccflags-y := -I$(src)/../hypervisor/include
+
+OBJCOPYFLAGS := -O binary
+
+CONFIGS = $(shell cd $(src); ls *.c)
+
+always := $(CONFIGS:.c=.cell)
+
+targets += $(CONFIGS:.c=.o) $(CONFIGS:.c=.cell)
+
+dummy: $(addprefix $(obj)/,$(CONFIGS:.c=.o))
+
+$(obj)/%.cell: $(obj)/%.o
+ $(call if_changed,objcopy)
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Test configuration for Samsung Chromebook, 2 GB RAM, 64 MB hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+#define ALIGN __attribute__((aligned(1)))
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
+struct {
+ struct jailhouse_system ALIGN header;
+ __u64 ALIGN cpus[1];
+ struct jailhouse_memory ALIGN mem_regions[1];
+} ALIGN config = {
+ .header = {
+ .hypervisor_memory = {
+ .phys_start = 0xbc000000,
+ .size = 0x4000000,
+ },
+ .system = {
+ .name = "Samsung Chromebook",
+
+ .cpu_set_size = sizeof(config.cpus),
+ .num_memory_regions = ARRAY_SIZE(config.mem_regions),
+ .num_irq_lines = 0,
+ .pio_bitmap_size = 0,
+
+ .num_pci_devices = 0,
+ },
+ },
+
+ .cpus = {
+ 0xf,
+ },
+
+ .mem_regions = {
+ /* RAM */ {
+ .phys_start = 0x0,
+ .virt_start = 0x0,
+ .size = 0x3c000000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE,
+ },
+ },
+};
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Test configuration for Celsius H700, 8 GB RAM, 64 MB hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+#define ALIGN __attribute__((aligned(1)))
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
+struct {
+ struct jailhouse_system ALIGN header;
+ __u64 ALIGN cpus[1];
+ struct jailhouse_memory ALIGN mem_regions[9];
+ __u8 ALIGN pio_bitmap[0x2000];
+} ALIGN config = {
+ .header = {
+ .hypervisor_memory = {
+ .phys_start = 0x3c000000,
+ .size = 0x4000000,
+ },
+ .config_memory = {
+ .phys_start = 0xbf7de000,
+ .size = 0x21000,
+ },
+ .system = {
+ .name = "Celsius H700",
+
+ .cpu_set_size = sizeof(config.cpus),
+ .num_memory_regions = ARRAY_SIZE(config.mem_regions),
+ .num_irq_lines = 0,
+ .pio_bitmap_size = ARRAY_SIZE(config.pio_bitmap),
+
+ .num_pci_devices = 0,
+ },
+ },
+
+ .cpus = {
+ 0xf,
+ },
+
+ .mem_regions = {
+ /* RAM */ {
+ .phys_start = 0x0,
+ .virt_start = 0x0,
+ .size = 0x3c000000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+ JAILHOUSE_MEM_DMA,
+ },
+ /* RAM */ {
+ .phys_start = 0x40000000,
+ .virt_start = 0x40000000,
+ .size = 0x7f7de000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+ JAILHOUSE_MEM_DMA,
+ },
+ /* ACPI */ {
+ .phys_start = 0xbf7de000,
+ .virt_start = 0xbf7de000,
+ .size = 0x21000,
+ .access_flags = JAILHOUSE_MEM_READ,
+ },
+ /* RAM */ {
+ .phys_start = 0xbf7ff000,
+ .virt_start = 0xbf7ff000,
+ .size = 0x801000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+ JAILHOUSE_MEM_DMA,
+ },
+ /* PCI */ {
+ .phys_start = 0xc0000000,
+ .virt_start = 0xc0000000,
+ .size = 0x3eb00000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE,
+ },
+ /* yeah, that's not really safe... */
+ /* IOAPIC */ {
+ .phys_start = 0xfec00000,
+ .virt_start = 0xfec00000,
+ .size = 0x1000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE,
+ },
+ /* the same here until we catch MSIs via interrupt remapping */
+ /* HPET */ {
+ .phys_start = 0xfed00000,
+ .virt_start = 0xfed00000,
+ .size = 0x1000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE,
+ },
+ /* RAM */ {
+ .phys_start = 0x100000000,
+ .virt_start = 0x100000000,
+ .size = 0xfc000000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+ JAILHOUSE_MEM_DMA,
+ },
+ /* RAM */ {
+ .phys_start = 0x200000000,
+ .virt_start = 0x200000000,
+ .size = 0x3c000000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+ JAILHOUSE_MEM_DMA,
+ },
+ },
+
+ .pio_bitmap = {
+ [ 0/8 ... 0x1f/8] = -1,
+ [ 0x20/8 ... 0x27/8] = 0xfc, /* HACK: PIC */
+ [ 0x28/8 ... 0x3f/8] = -1,
+ [ 0x40/8 ... 0x47/8] = 0xf0, /* PIT */
+ [ 0x48/8 ... 0x5f/8] = -1,
+ [ 0x60/8 ... 0x67/8] = 0x0, /* HACK: 8042, and more? */
+ [ 0x68/8 ... 0x6f/8] = -1,
+ [ 0x70/8 ... 0x77/8] = 0xfc, /* rtc */
+ [ 0x78/8 ... 0x7f/8] = -1,
+ [ 0x80/8 ... 0x8f/8] = 0, /* dma */
+ [ 0x90/8 ... 0x16f/8] = -1,
+ [ 0x170/8 ... 0x177/8] = 0, /* ide */
+ [ 0x178/8 ... 0x1ef/8] = -1,
+ [ 0x1f0/8 ... 0x1f7/8] = 0, /* ide */
+ [ 0x1f8/8 ... 0x2f7/8] = -1,
+ [ 0x2f8/8 ... 0x2ff/8] = 0, /* serial2 */
+ [ 0x300/8 ... 0x36f/8] = -1,
+ [ 0x370/8 ... 0x377/8] = 0xbf, /* ide */
+ [ 0x378/8 ... 0x3af/8] = -1,
+ [ 0x3b0/8 ... 0x3df/8] = 0, /* VGA */
+ [ 0x3e0/8 ... 0x3f7/8] = -1,
+ [ 0x3f8/8 ... 0x3ff/8] = 0, /* serial 1 */
+ [ 0x400/8 ... 0x47f/8] = 0, /* ACPI...? */
+ [ 0x480/8 ... 0xcf7/8] = -1,
+ [ 0xcf8/8 ... 0xffff/8] = 0, /* HACK: full PCI */
+ },
+};
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Test configuration for Asus H87I-PLUS, 4 GB RAM, 64 MB hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+#define ALIGN __attribute__((aligned(1)))
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
+struct {
+ struct jailhouse_system ALIGN header;
+ __u64 ALIGN cpus[1];
+ struct jailhouse_memory ALIGN mem_regions[8];
+ __u8 ALIGN pio_bitmap[0x2000];
+} ALIGN config = {
+ .header = {
+ .hypervisor_memory = {
+ .phys_start = 0x3c000000,
+ .size = 0x4000000,
+ },
+ .config_memory = {
+ .phys_start = 0xcca64000,
+ .size = 0x15000,
+ },
+ .system = {
+ .name = "H87I-PLUS",
+
+ .cpu_set_size = sizeof(config.cpus),
+ .num_memory_regions = ARRAY_SIZE(config.mem_regions),
+ .num_irq_lines = 0,
+ .pio_bitmap_size = ARRAY_SIZE(config.pio_bitmap),
+
+ .num_pci_devices = 0,
+ },
+ },
+
+ .cpus = {
+ 0xff,
+ },
+
+ .mem_regions = {
+ /* RAM */ {
+ .phys_start = 0x0,
+ .virt_start = 0x0,
+ .size = 0x3c000000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+ JAILHOUSE_MEM_DMA,
+ },
+ /* RAM */ {
+ .phys_start = 0x40000000,
+ .virt_start = 0x40000000,
+ .size = 0x8ca64000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+ JAILHOUSE_MEM_DMA,
+ },
+ /* ACPI */ {
+ .phys_start = 0xcca64000,
+ .virt_start = 0xcca64000,
+ .size = 0x15000,
+ .access_flags = JAILHOUSE_MEM_READ,
+ },
+ /* RAM */ {
+ .phys_start = 0xcca79000,
+ .virt_start = 0xcca79000,
+ .size = 0x12787000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+ JAILHOUSE_MEM_DMA,
+ },
+ /* PCI */ {
+ .phys_start = 0xdf200000,
+ .virt_start = 0xdf200000,
+ .size = 0x1fa00000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE,
+ },
+ /* yeah, that's not really safe... */
+ /* IOAPIC */ {
+ .phys_start = 0xfec00000,
+ .virt_start = 0xfec00000,
+ .size = 0x1000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE,
+ },
+ /* the same here until we catch MSIs via interrupt remapping */
+ /* HPET */ {
+ .phys_start = 0xfed00000,
+ .virt_start = 0xfed00000,
+ .size = 0x1000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE,
+ },
+ /* RAM */ {
+ .phys_start = 0x100000000,
+ .virt_start = 0x100000000,
+ .size = 0x20000000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+ JAILHOUSE_MEM_DMA,
+ },
+ },
+
+ .pio_bitmap = {
+ [ 0/8 ... 0x1f/8] = -1,
+ [ 0x20/8 ... 0x27/8] = 0xfc, /* HACK: PIC */
+ [ 0x28/8 ... 0x3f/8] = -1,
+ [ 0x40/8 ... 0x47/8] = 0xf0, /* PIT */
+ [ 0x48/8 ... 0x5f/8] = -1,
+ [ 0x60/8 ... 0x67/8] = 0x0, /* HACK: 8042, and more? */
+ [ 0x68/8 ... 0x6f/8] = -1,
+ [ 0x70/8 ... 0x77/8] = 0xfc, /* rtc */
+ [ 0x78/8 ... 0x7f/8] = -1,
+ [ 0x80/8 ... 0x8f/8] = 0, /* dma */
+ [ 0x90/8 ... 0x3af/8] = -1,
+ [ 0x3b0/8 ... 0x3df/8] = 0, /* VGA */
+ [ 0x3e0/8 ... 0xcf7/8] = -1,
+ [ 0xcf8/8 ... 0xffff/8] = 0, /* HACK: full PCI */
+ },
+};
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Minimal configuration for demo inmates, 1 CPU, 1 MB RAM, 1 serial port
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+#define ALIGN __attribute__((aligned(1)))
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
+struct {
+ struct jailhouse_cell_desc ALIGN cell;
+ __u64 ALIGN cpus[1];
+ struct jailhouse_memory ALIGN mem_regions[1];
+ __u8 ALIGN pio_bitmap[0x2000];
+} ALIGN config = {
+ .cell = {
+ .name = "Minimal",
+
+ .cpu_set_size = sizeof(config.cpus),
+ .num_memory_regions = ARRAY_SIZE(config.mem_regions),
+ .num_irq_lines = 0,
+ .pio_bitmap_size = ARRAY_SIZE(config.pio_bitmap),
+
+ .num_pci_devices = 0,
+ },
+
+ .cpus = {
+ 0x8,
+ },
+
+ .mem_regions = {
+ /* RAM */ {
+ .phys_start = 0x3bf00000,
+ .virt_start = 0,
+ .size = 0x00100000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE,
+ },
+ },
+
+ .pio_bitmap = {
+ [ 0/8 ... 0x3f7/8] = -1,
+ [ 0x3f8/8 ... 0x3ff/8] = 0, /* serial1 */
+ [ 0x400/8 ... 0x407/8] = -1,
+ [ 0x408/8 ... 0x40f/8] = 0xf0, /* PM-timer H700 */
+ [ 0x410/8 ... 0x1807/8] = -1,
+ [0x1808/8 ... 0x180f/8] = 0xf0, /* PM-timer H87I-PLUS */
+ [0x1810/8 ... 0xb007/8] = -1,
+ [0xb008/8 ... 0xb00f/8] = 0xf0, /* PM-timer QEMU */
+ [0xb010/8 ... 0xe00f/8] = -1,
+ [0xe010/8 ... 0xe017/8] = 0, /* OXPCIe952 serial1 */
+ [0xe018/8 ... 0xffff/8] = -1,
+ },
+};
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Test configuration for QEMU VM, 1 GB RAM, 64 MB hypervisor (-8 K ACPI)
+ * Command line:
+ * qemu-system-x86_64 /path/to/image -m 1G -enable-kvm -smp 4 \
+ * -virtfs local,path=/local/path,security_model=passthrough,mount_tag=host \
+ * -cpu kvm64,-kvm_pv_eoi,-kvm_steal_time,-kvm_asyncpf,-kvmclock,+vmx,+x2apic
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+#define ALIGN __attribute__((aligned(1)))
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
+struct {
+ struct jailhouse_system ALIGN header;
+ __u64 ALIGN cpus[1];
+ struct jailhouse_memory ALIGN mem_regions[5];
+ __u8 ALIGN pio_bitmap[0x2000];
+} ALIGN config = {
+ .header = {
+ .hypervisor_memory = {
+ .phys_start = 0x3c000000,
+ .size = 0x4000000 - 0x2000,
+ },
+ .config_memory = {
+ .phys_start = 0x3fffe000,
+ .size = 0x2000,
+ },
+ .system = {
+ .name = "QEMU Linux VM",
+
+ .cpu_set_size = sizeof(config.cpus),
+ .num_memory_regions = ARRAY_SIZE(config.mem_regions),
+ .num_irq_lines = 0,
+ .pio_bitmap_size = ARRAY_SIZE(config.pio_bitmap),
+
+ .num_pci_devices = 0,
+ },
+ },
+
+ .cpus = {
+ 0xf,
+ },
+
+ .mem_regions = {
+ /* RAM */ {
+ .phys_start = 0x0,
+ .virt_start = 0x0,
+ .size = 0x3c000000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+ JAILHOUSE_MEM_DMA,
+ },
+ /* ACPI */ {
+ .phys_start = 0x3fffe000,
+ .virt_start = 0x3fffe000,
+ .size = 0x2000,
+ .access_flags = JAILHOUSE_MEM_READ,
+ },
+ /* PCI */ {
+ .phys_start = 0x80000000,
+ .virt_start = 0x80000000,
+ .size = 0x7ec00000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE,
+ },
+ /* yeah, that's not really safe... */
+ /* IOAPIC */ {
+ .phys_start = 0xfec00000,
+ .virt_start = 0xfec00000,
+ .size = 0x1000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE,
+ },
+ /* the same here until we catch MSIs via interrupt remapping */
+ /* HPET */ {
+ .phys_start = 0xfed00000,
+ .virt_start = 0xfed00000,
+ .size = 0x1000,
+ .access_flags = JAILHOUSE_MEM_READ |
+ JAILHOUSE_MEM_WRITE,
+ },
+ },
+
+ .pio_bitmap = {
+ [ 0/8 ... 0x1f/8] = -1,
+ [ 0x20/8 ... 0x27/8] = 0xfc, /* HACK: PIC */
+ [ 0x28/8 ... 0x5f/8] = -1,
+ [ 0x60/8 ... 0x67/8] = 0xee, /* HACK: 8042 */
+ [ 0x68/8 ... 0x6f/8] = -1,
+ [ 0x70/8 ... 0x77/8] = 0xfc, /* rtc */
+ [ 0x78/8 ... 0x16f/8] = -1,
+ [ 0x170/8 ... 0x177/8] = 0, /* ide */
+ [ 0x178/8 ... 0x1ef/8] = -1,
+ [ 0x1f0/8 ... 0x1f7/8] = 0, /* ide */
+ [ 0x1f8/8 ... 0x2f7/8] = -1,
+ [ 0x2f8/8 ... 0x2ff/8] = 0, /* serial2 */
+ [ 0x300/8 ... 0x36f/8] = -1,
+ [ 0x370/8 ... 0x377/8] = 0xbf, /* ide */
+ [ 0x378/8 ... 0x3af/8] = -1,
+ [ 0x3b0/8 ... 0x3df/8] = 0, /* VGA */
+ [ 0x3e0/8 ... 0x3ef/8] = -1,
+ [ 0x3f0/8 ... 0x3f7/8] = 0xbf, /* ide */
+ [ 0x3f8/8 ... 0xcf7/8] = -1,
+ [ 0xcf8/8 ... 0xcff/8] = 0, /* HACK: PCI, PIIX3 RCR */
+ [ 0xd00/8 ... 0x5657/8] = -1,
+ [0x5658/8 ... 0x565f/8] = 0xf0, /* vmport */
+ [0x5660/8 ... 0xc03f/8] = -1,
+ [0xc000/8 ... 0xc03f/8] = 0, /* virtio-9p-pci */
+ [0xc040/8 ... 0xc07f/8] = 0, /* e1000 */
+ [0xc080/8 ... 0xc08f/8] = 0, /* piix bmdma */
+ [0xc090/8 ... 0xffff/8] = -1,
+ },
+};
--- /dev/null
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+# Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2. See
+# the COPYING file in the top-level directory.
+#
+
+LINUXINCLUDE := -I$(src)/arch/$(SRCARCH)/include -I$(src)/include
+KBUILD_CFLAGS := -g -Os -Wall -Wstrict-prototypes -Wtype-limits \
+ -Wmissing-declarations -Wmissing-prototypes \
+ -fno-strict-aliasing -fpic -fpie -fno-common
+ifneq ($(wildcard $(src)/include/jailhouse/config.h),)
+KBUILD_CFLAGS += -include $(src)/include/jailhouse/config.h
+endif
+
+subdir-y := arch/$(SRCARCH)
+
+always := jailhouse.bin
+
+hypervisor-y := setup.o printk.o paging.o control.o lib.o \
+ arch/$(SRCARCH)/built-in.o hypervisor.lds
+targets += $(hypervisor-y)
+
+HYPERVISOR_OBJS = $(addprefix $(obj)/,$(hypervisor-y))
+
+LDFLAGS_hypervisor.o := -T
+
+targets += hypervisor.o
+$(obj)/hypervisor.o: $(src)/hypervisor.lds $(HYPERVISOR_OBJS)
+ $(call if_changed,ld)
+
+OBJCOPYFLAGS_jailhouse.bin := -O binary
+
+targets += jailhouse.bin
+$(obj)/jailhouse.bin: $(obj)/hypervisor.o
+ $(call if_changed,objcopy)
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/acpi.h>
+#include <jailhouse/control.h>
+#include <jailhouse/entry.h>
+
+static bool acpi_valid_checksum(const struct acpi_table_header *table)
+{
+ const u8 *pos = (const u8 *)table;
+ const u8 *end = pos + table->length;
+ u8 sum = 0;
+
+ while (pos < end)
+ sum += *pos++;
+ return sum == 0;
+}
+
+const struct acpi_table_header *
+acpi_find_table(char name[4], const struct acpi_table_header *start)
+{
+ void *end = config_memory + system_config->config_memory.size;
+ const struct acpi_table_header *tab;
+ const void *pos;
+
+ pos = start ? ((const void *)start) + start->length : config_memory;
+ while ((pos + sizeof(struct acpi_table_header)) < end) {
+ tab = pos++;
+
+ if (tab->signature == *(u32 *)name &&
+ (pos + tab->length) < end && acpi_valid_checksum(tab))
+ return tab;
+ }
+
+ return NULL;
+}
--- /dev/null
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+# Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2. See
+# the COPYING file in the top-level directory.
+#
+
+KBUILD_AFLAGS := $(filter-out -include asm/unified.h,$(KBUILD_AFLAGS))
+
+always := built-in.o
+
+#obj-y := dbg-write.o entry.o setup.o fault.o control.o mmio.o
+obj-y := entry.o setup.o
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/percpu.h>
+
+/* Entry point for Linux loader module on JAILHOUSE_ENABLE */
+ .text
+ .globl arch_entry
+arch_entry:
+ mvn %r0,#~-38
+ bx %lr
+
+
+/* Fix up Global Offset Table with absolute hypervisor address */
+ .globl got_init
+got_init:
+ bx %lr
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_BITOPS_H
+#define _JAILHOUSE_ASM_BITOPS_H
+
+#include <asm/types.h>
+
+static inline __attribute__((always_inline)) void
+clear_bit(int nr, volatile unsigned long *addr)
+{
+}
+
+static inline __attribute__((always_inline)) void
+set_bit(unsigned int nr, volatile unsigned long *addr)
+{
+}
+
+static inline __attribute__((always_inline)) int
+constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
+{
+ return ((1UL << (nr % BITS_PER_LONG)) &
+ (addr[nr / BITS_PER_LONG])) != 0;
+}
+
+static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
+{
+ return 0;
+}
+
+#define test_bit(nr, addr) \
+ (__builtin_constant_p((nr)) \
+ ? constant_test_bit((nr), (addr)) \
+ : variable_test_bit((nr), (addr)))
+
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ return 0;
+}
+
+static inline unsigned long ffz(unsigned long word)
+{
+ return 0;
+}
+
+#endif /* !_JAILHOUSE_ASM_BITOPS_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_CELL_H
+#define _JAILHOUSE_ASM_CELL_H
+
+#include <asm/types.h>
+#include <asm/paging.h>
+
+#include <jailhouse/cell-config.h>
+
+struct cell {
+ char name[JAILHOUSE_CELL_NAME_MAXLEN+1];
+
+ struct cpu_set *cpu_set;
+ struct cpu_set small_cpu_set;
+
+ unsigned long page_offset;
+
+ struct cell *next;
+};
+
+extern struct cell *cell_list;
+
+#endif /* !_JAILHOUSE_ASM_CELL_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#define JAILHOUSE_CALL_INS ".arch_extension virt\n\t" \
+ "hvc #0x4a48"
+#define JAILHOUSE_CALL_NUM_RESULT "r0"
+#define JAILHOUSE_CALL_ARG1 "r1"
+#define JAILHOUSE_CALL_ARG2 "r2"
+#define JAILHOUSE_CALL_ARG3 "r3"
+#define JAILHOUSE_CALL_ARG4 "r4"
+
+static inline __u32 jailhouse_call0(__u32 num)
+{
+ register __u32 num_result asm(JAILHOUSE_CALL_NUM_RESULT) = num;
+
+ asm volatile(
+ __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%0")
+ __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%1")
+ JAILHOUSE_CALL_INS
+ : "=r" (num_result)
+ : "r" (num_result)
+ : "memory");
+ return num_result;
+}
+
+static inline __u32 jailhouse_call1(__u32 num, __u32 arg1)
+{
+ register __u32 num_result asm(JAILHOUSE_CALL_NUM_RESULT) = num;
+ register __u32 __arg1 asm(JAILHOUSE_CALL_ARG1) = arg1;
+
+ asm volatile(
+ __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%0")
+ __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%1")
+ __asmeq(JAILHOUSE_CALL_ARG1, "%2")
+ JAILHOUSE_CALL_INS
+ : "=r" (num_result)
+ : "r" (num_result), "r" (__arg1)
+ : "memory");
+ return num_result;
+}
+
+static inline __u32 jailhouse_call2(__u32 num, __u32 arg1, __u32 arg2)
+{
+ register __u32 num_result asm(JAILHOUSE_CALL_NUM_RESULT) = num;
+ register __u32 __arg1 asm(JAILHOUSE_CALL_ARG1) = arg1;
+ register __u32 __arg2 asm(JAILHOUSE_CALL_ARG2) = arg2;
+
+ asm volatile(
+ __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%0")
+ __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%1")
+ __asmeq(JAILHOUSE_CALL_ARG1, "%2")
+ __asmeq(JAILHOUSE_CALL_ARG2, "%3")
+ JAILHOUSE_CALL_INS
+ : "=r" (num_result)
+ : "r" (num_result), "r" (__arg1), "r" (__arg2)
+ : "memory");
+ return num_result;
+}
+
+static inline __u32 jailhouse_call3(__u32 num, __u32 arg1, __u32 arg2,
+ __u32 arg3)
+{
+ register __u32 num_result asm(JAILHOUSE_CALL_NUM_RESULT) = num;
+ register __u32 __arg1 asm(JAILHOUSE_CALL_ARG1) = arg1;
+ register __u32 __arg2 asm(JAILHOUSE_CALL_ARG2) = arg2;
+ register __u32 __arg3 asm(JAILHOUSE_CALL_ARG3) = arg3;
+
+ asm volatile(
+ __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%0")
+ __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%1")
+ __asmeq(JAILHOUSE_CALL_ARG1, "%2")
+ __asmeq(JAILHOUSE_CALL_ARG2, "%3")
+ __asmeq(JAILHOUSE_CALL_ARG3, "%4")
+ JAILHOUSE_CALL_INS
+ : "=r" (num_result)
+ : "r" (num_result), "r" (__arg1), "r" (__arg2), "r" (__arg3)
+ : "memory");
+ return num_result;
+}
+
+static inline __u32 jailhouse_call4(__u32 num, __u32 arg1, __u32 arg2,
+ __u32 arg3, __u32 arg4)
+{
+ register __u32 num_result asm(JAILHOUSE_CALL_NUM_RESULT) = num;
+ register __u32 __arg1 asm(JAILHOUSE_CALL_ARG1) = arg1;
+ register __u32 __arg2 asm(JAILHOUSE_CALL_ARG2) = arg2;
+ register __u32 __arg3 asm(JAILHOUSE_CALL_ARG3) = arg3;
+ register __u32 __arg4 asm(JAILHOUSE_CALL_ARG4) = arg4;
+
+ asm volatile(
+ __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%0")
+ __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%1")
+ __asmeq(JAILHOUSE_CALL_ARG1, "%2")
+ __asmeq(JAILHOUSE_CALL_ARG2, "%3")
+ __asmeq(JAILHOUSE_CALL_ARG3, "%4")
+ __asmeq(JAILHOUSE_CALL_ARG4, "%5")
+ JAILHOUSE_CALL_INS
+ : "=r" (num_result)
+ : "r" (num_result), "r" (__arg1), "r" (__arg2), "r" (__arg3),
+ "r" (__arg4)
+ : "memory");
+ return num_result;
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PAGING_H
+#define _JAILHOUSE_ASM_PAGING_H
+
+#include <asm/types.h>
+#include <asm/processor.h>
+
+#define PAGE_SIZE 4096
+#define PAGE_MASK ~(PAGE_SIZE - 1)
+
+#define PAGE_DIR_LEVELS 4
+
+#define PAGE_TABLE_OFFS_MASK 0x00000ff8UL
+#define PAGE_ADDR_MASK 0xfffff000UL
+#define PAGE_OFFS_MASK 0x00000fffUL
+#define HUGEPAGE_ADDR_MASK 0xffe00000UL
+#define HUGEPAGE_OFFS_MASK 0x001fffffUL
+
+#define PAGE_FLAG_PRESENT 0x01
+#define PAGE_FLAG_RW 0x02
+#define PAGE_FLAG_SUPERVISOR 0x04
+#define PAGE_FLAG_UNCACHED 0x10
+
+#define PAGE_DEFAULT_FLAGS (PAGE_FLAG_PRESENT | PAGE_FLAG_RW | \
+ PAGE_FLAG_SUPERVISOR)
+#define PAGE_READONLY_FLAGS (PAGE_FLAG_PRESENT | PAGE_FLAG_SUPERVISOR)
+
+#define INVALID_PHYS_ADDR (~0UL)
+
+#define REMAP_BASE_ADDR 0x00100000UL
+#define NUM_REMAP_BITMAP_PAGES 1
+
+#define FOREIGN_MAPPING_BASE REMAP_BASE_ADDR
+#define NUM_FOREIGN_PAGES 16
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long pgd_t;
+typedef unsigned long pud_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pte_t;
+
+static inline bool pgd_valid(pgd_t *pgd)
+{
+ return *pgd & 1;
+}
+
+static inline pgd_t *pgd_offset(pgd_t *page_table, unsigned long addr)
+{
+ return NULL;
+}
+
+static inline void set_pgd(pgd_t *pgd, unsigned long addr, unsigned long flags)
+{
+ *pgd = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pgd(pgd_t *pgd)
+{
+ *pgd = 0;
+}
+
+static inline bool pud_valid(pud_t *pud)
+{
+ return *pud & 1;
+}
+
+static inline pud_t *pud4l_offset(pgd_t *pgd, unsigned long page_table_offset,
+ unsigned long addr)
+{
+ return NULL;
+}
+
+static inline pud_t *pud3l_offset(pgd_t *page_table, unsigned long addr)
+{
+ return NULL;
+}
+
+static inline void set_pud(pud_t *pud, unsigned long addr, unsigned long flags)
+{
+ *pud = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pud(pud_t *pud)
+{
+ *pud = 0;
+}
+
+static inline bool pmd_valid(pmd_t *pmd)
+{
+ return *pmd & 1;
+}
+
+static inline bool pmd_is_hugepage(pmd_t *pmd)
+{
+ return *pmd & (1 << 7);
+}
+
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long page_table_offset,
+ unsigned long addr)
+{
+ return NULL;
+}
+
+static inline void set_pmd(pmd_t *pmd, unsigned long addr, unsigned long flags)
+{
+ *pmd = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pmd(pmd_t *pmd)
+{
+ *pmd = 0;
+}
+
+static inline bool pte_valid(pte_t *pte)
+{
+ return *pte & 1;
+}
+
+static inline pte_t *pte_offset(pmd_t *pmd, unsigned long page_table_offset,
+ unsigned long addr)
+{
+ return NULL;
+}
+
+static inline void set_pte(pte_t *pte, unsigned long addr, unsigned long flags)
+{
+ *pte = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pte(pte_t *pte)
+{
+ *pte = 0;
+}
+
+static inline unsigned long phys_address(pte_t *pte, unsigned long addr)
+{
+ return (*pte & PAGE_ADDR_MASK) + (addr & PAGE_OFFS_MASK);
+}
+
+static inline unsigned long phys_address_hugepage(pmd_t *pmd,
+ unsigned long addr)
+{
+ return (*pmd & HUGEPAGE_ADDR_MASK) + (addr & HUGEPAGE_OFFS_MASK);
+}
+
+static inline bool pud_empty(pgd_t *pgd, unsigned long page_table_offset)
+{
+ pud_t *pud = (pud_t *)((*pgd & PAGE_ADDR_MASK) + page_table_offset);
+ int n;
+
+ for (n = 0; n < PAGE_SIZE / sizeof(pud_t); n++, pud++)
+ if (pud_valid(pud))
+ return false;
+ return true;
+}
+
+static inline bool pmd_empty(pud_t *pud, unsigned long page_table_offset)
+{
+ pmd_t *pmd = (pmd_t *)((*pud & PAGE_ADDR_MASK) + page_table_offset);
+ int n;
+
+ for (n = 0; n < PAGE_SIZE / sizeof(pmd_t); n++, pmd++)
+ if (pmd_valid(pmd))
+ return false;
+ return true;
+}
+
+static inline bool pt_empty(pmd_t *pmd, unsigned long page_table_offset)
+{
+ pte_t *pte = (pte_t *)((*pmd & PAGE_ADDR_MASK) + page_table_offset);
+ int n;
+
+ for (n = 0; n < PAGE_SIZE / sizeof(pte_t); n++, pte++)
+ if (pte_valid(pte))
+ return false;
+ return true;
+}
+
+static inline void flush_tlb(void)
+{
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PAGING_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PERCPU_H
+#define _JAILHOUSE_ASM_PERCPU_H
+
+#include <asm/types.h>
+#include <asm/paging.h>
+
+#define NUM_ENTRY_REGS 6
+
+/* Keep in sync with struct per_cpu! */
+#define PERCPU_SIZE_SHIFT 13
+#define PERCPU_STACK_END PAGE_SIZE
+#define PERCPU_LINUX_SP PERCPU_STACK_END
+#define PERCPU_CPU_ID (PERCPU_LINUX_SP + 4)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/cell.h>
+
+struct per_cpu {
+ /* Keep these three in sync with defines above! */
+ u8 stack[PAGE_SIZE];
+ unsigned long linux_sp;
+ unsigned int cpu_id;
+
+// u32 apic_id;
+ struct cell *cell;
+
+ unsigned long linux_reg[NUM_ENTRY_REGS];
+// unsigned long linux_ip;
+ bool initialized;
+
+ volatile bool stop_cpu;
+ volatile bool wait_for_sipi;
+ volatile bool cpu_stopped;
+ bool init_signaled;
+ int sipi_vector;
+ bool flush_caches;
+ bool shutdown_cpu;
+} __attribute__((aligned(PAGE_SIZE)));
+
+static inline struct per_cpu *per_cpu(unsigned int cpu)
+{
+ extern u8 __page_pool[];
+
+ return (struct per_cpu *)(__page_pool + (cpu << PERCPU_SIZE_SHIFT));
+}
+
+/* Validate defines */
+#define CHECK_ASSUMPTION(assume) ((void)sizeof(char[1 - 2*!(assume)]))
+
+static inline void __check_assumptions(void)
+{
+ struct per_cpu cpu_data;
+
+ CHECK_ASSUMPTION(sizeof(struct per_cpu) == (1 << PERCPU_SIZE_SHIFT));
+ CHECK_ASSUMPTION(sizeof(cpu_data.stack) == PERCPU_STACK_END);
+ CHECK_ASSUMPTION(__builtin_offsetof(struct per_cpu, linux_sp) ==
+ PERCPU_LINUX_SP);
+ CHECK_ASSUMPTION(__builtin_offsetof(struct per_cpu, cpu_id) ==
+ PERCPU_CPU_ID);
+}
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PERCPU_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PROCESSOR_H
+#define _JAILHOUSE_ASM_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+struct registers {
+};
+
+static inline void cpu_relax(void)
+{
+}
+
+static inline void memory_barrier(void)
+{
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PROCESSOR_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/bitops.h>
+#include <asm/processor.h>
+
+typedef struct {
+ unsigned long state;
+} spinlock_t;
+
+#define DEFINE_SPINLOCK(name) spinlock_t (name)
+
+static inline void spin_lock(spinlock_t *lock)
+{
+// while (test_and_set_bit(0, &lock->state))
+// cpu_relax();
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+// asm volatile("": : :"memory");
+// clear_bit(0, &lock->state);
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_TYPES_H
+#define _JAILHOUSE_ASM_TYPES_H
+
+#define NULL ((void *)0)
+
+#define BITS_PER_LONG 32
+
+#ifndef __ASSEMBLY__
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+typedef s8 __s8;
+typedef u8 __u8;
+
+typedef s16 __s16;
+typedef u16 __u16;
+
+typedef s32 __s32;
+typedef u32 __u32;
+
+typedef s64 __s64;
+typedef u64 __u64;
+
+typedef enum { true=1, false=0 } bool;
+
+struct cpu_set {
+ unsigned long max_cpu_id;
+ /* Note: The bitmap is supposed to be extended by embedding this
+ * struct into a larger buffer. */
+ unsigned long bitmap[1];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_TYPES_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/entry.h>
+
+int arch_init_early(struct cell *linux_cell,
+ struct jailhouse_cell_desc *config)
+{
+ return -ENOSYS;
+}
+
+int arch_cpu_init(struct per_cpu *cpu_data)
+{
+ return -ENOSYS;
+}
+
+int arch_init_late(struct cell *linux_cell,
+ struct jailhouse_cell_desc *config)
+{
+ return -ENOSYS;
+}
+
+void arch_cpu_activate_vmm(struct per_cpu *cpu_data)
+{
+ while (1);
+}
+
+void arch_cpu_restore(struct per_cpu *cpu_data)
+{
+}
+
+// catch missing symbols
+#include <jailhouse/printk.h>
+#include <jailhouse/processor.h>
+#include <jailhouse/control.h>
+#include <jailhouse/string.h>
+void arch_dbg_write_init(void) {}
+int phys_processor_id(void) { return 0; }
+void arch_suspend_cpu(unsigned int cpu_id) {}
+void arch_resume_cpu(unsigned int cpu_id) {}
+void arch_reset_cpu(unsigned int cpu_id) {}
+void arch_shutdown_cpu(unsigned int cpu_id) {}
+int arch_cell_create(struct per_cpu *cpu_data, struct cell *new_cell,
+ struct jailhouse_cell_desc *config) { return -ENOSYS; }
+void *memcpy(void *dest, const void *src, unsigned long n) { return NULL; }
+void arch_dbg_write(const char *msg) {}
--- /dev/null
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+# Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2. See
+# the COPYING file in the top-level directory.
+#
+
+always := built-in.o
+
+obj-y := apic.o dbg-write.o entry.o setup.o fault.o vmx.o control.o mmio.o \
+ ../../acpi.o
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/processor.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/control.h>
+#include <jailhouse/mmio.h>
+#include <asm/apic.h>
+#include <asm/bitops.h>
+#include <asm/fault.h>
+#include <asm/spinlock.h>
+#include <asm/vmx.h>
+
+bool using_x2apic;
+
+static u8 apic_to_cpu_id[] = { [0 ... APIC_MAX_PHYS_ID] = APIC_INVALID_ID };
+static DEFINE_SPINLOCK(wait_lock);
+static void *xapic_page;
+
+static struct {
+ u32 (*read)(unsigned int reg);
+ u32 (*read_id)(void);
+ void (*write)(unsigned int reg, u32 val);
+ void (*send_ipi)(u32 apic_id, u32 icr_lo);
+} apic_ops;
+
+static u32 read_xapic(unsigned int reg)
+{
+ return *(volatile u32 *)(xapic_page + (reg << 4));
+}
+
+static u32 read_xapic_id(void)
+{
+ return *(volatile u32 *)(xapic_page + (APIC_REG_ID << 4)) >> 24;
+}
+
+static void write_xapic(unsigned int reg, u32 val)
+{
+ *(volatile u32 *)(xapic_page + (reg << 4)) = val;
+}
+
+static void send_xapic_ipi(u32 apic_id, u32 icr_lo)
+{
+ while (read_xapic(APIC_REG_ICR) & APIC_ICR_DS_PENDING)
+ cpu_relax();
+ *(volatile u32 *)(xapic_page + (APIC_REG_ICR_HI << 4)) = apic_id << 24;
+ *(volatile u32 *)(xapic_page + (APIC_REG_ICR << 4)) = icr_lo;
+}
+
+static u32 read_x2apic(unsigned int reg)
+{
+ return read_msr(MSR_X2APIC_BASE + reg);
+}
+
+static u32 read_x2apic_id(void)
+{
+ return read_msr(MSR_X2APIC_BASE + APIC_REG_ID);
+}
+
+static void write_x2apic(unsigned int reg, u32 val)
+{
+ write_msr(MSR_X2APIC_BASE + reg, val);
+}
+
+static void send_x2apic_ipi(u32 apic_id, u32 icr_lo)
+{
+ write_msr(MSR_X2APIC_BASE + APIC_REG_ICR,
+ ((unsigned long)apic_id) << 32 | icr_lo);
+}
+
+int phys_processor_id(void)
+{
+ return apic_ops.read_id();
+}
+
+int apic_cpu_init(struct per_cpu *cpu_data)
+{
+ unsigned int apic_id = phys_processor_id();
+ unsigned int cpu_id = cpu_data->cpu_id;
+
+ printk("(APIC ID %d) ", apic_id);
+
+ if (apic_id > APIC_MAX_PHYS_ID)
+ return -ERANGE;
+ if (apic_to_cpu_id[apic_id] != APIC_INVALID_ID)
+ return -EBUSY;
+ /* only flat mode with LDR corresponding to logical ID supported */
+ if (!using_x2apic && (apic_ops.read(APIC_REG_DFR) != 0xffffffff ||
+ apic_ops.read(APIC_REG_LDR) != 1UL << (cpu_id + 24)))
+ return -EINVAL;
+
+ apic_to_cpu_id[apic_id] = cpu_id;
+ cpu_data->apic_id = apic_id;
+ return 0;
+}
+
+int apic_init(void)
+{
+ unsigned long apicbase;
+ int err;
+
+ apicbase = read_msr(MSR_IA32_APICBASE);
+
+ if (apicbase & APIC_BASE_EXTD) {
+ /* set programmatically to enable address fixup */
+ apic_ops.read = read_x2apic;
+ apic_ops.read_id = read_x2apic_id;
+ apic_ops.write = write_x2apic;
+ apic_ops.send_ipi = send_x2apic_ipi;
+ using_x2apic = true;
+ } else if (apicbase & APIC_BASE_EN) {
+ xapic_page = page_alloc(&remap_pool, 1);
+ if (!xapic_page)
+ return -ENOMEM;
+ err = page_map_create(hv_page_table, XAPIC_BASE, PAGE_SIZE,
+ (unsigned long)xapic_page,
+ PAGE_DEFAULT_FLAGS | PAGE_FLAG_UNCACHED,
+ PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
+ if (err)
+ return err;
+ apic_ops.read = read_xapic;
+ apic_ops.read_id = read_xapic_id;
+ apic_ops.write = write_xapic;
+ apic_ops.send_ipi = send_xapic_ipi;
+ } else
+ return -EIO;
+
+ printk("Using x%sAPIC\n", using_x2apic ? "2" : "");
+
+ return 0;
+}
+
+void arch_suspend_cpu(unsigned int cpu_id)
+{
+ struct per_cpu *target_data = per_cpu(cpu_id);
+ bool target_stopped;
+
+ spin_lock(&wait_lock);
+
+ target_data->stop_cpu = true;
+ target_stopped = target_data->cpu_stopped;
+
+ spin_unlock(&wait_lock);
+
+ if (!target_stopped) {
+ apic_ops.send_ipi(target_data->apic_id,
+ APIC_ICR_DLVR_NMI |
+ APIC_ICR_DEST_PHYSICAL |
+ APIC_ICR_LV_ASSERT |
+ APIC_ICR_TM_EDGE |
+ APIC_ICR_SH_NONE);
+
+ while (!target_data->cpu_stopped)
+ cpu_relax();
+ }
+}
+
+void arch_resume_cpu(unsigned int cpu_id)
+{
+ /* make any state changes visible before releasing the CPU */
+ memory_barrier();
+
+ per_cpu(cpu_id)->stop_cpu = false;
+}
+
+/* target cpu has to be stopped */
+void arch_reset_cpu(unsigned int cpu_id)
+{
+ per_cpu(cpu_id)->sipi_vector = APIC_BSP_PSEUDO_SIPI;
+
+ arch_resume_cpu(cpu_id);
+}
+
+void arch_shutdown_cpu(unsigned int cpu_id)
+{
+ arch_suspend_cpu(cpu_id);
+ per_cpu(cpu_id)->shutdown_cpu = true;
+ arch_resume_cpu(cpu_id);
+ /*
+ * Note: The caller has to ensure that the target CPU has enough time
+ * to reach the shutdown position before destroying the code path it
+ * has to take to get there. This can be ensured by bringing the CPU
+ * online again under Linux before cleaning up the hypervisor.
+ */
+}
+
+void apic_nmi_handler(struct per_cpu *cpu_data)
+{
+ vmx_schedule_vmexit(cpu_data);
+}
+
+int apic_handle_events(struct per_cpu *cpu_data)
+{
+ spin_lock(&wait_lock);
+
+ do {
+ if (cpu_data->init_signaled) {
+ cpu_data->init_signaled = false;
+ cpu_data->wait_for_sipi = true;
+ } else
+ cpu_data->sipi_vector = -1;
+
+ cpu_data->cpu_stopped = true;
+
+ spin_unlock(&wait_lock);
+
+ while (cpu_data->wait_for_sipi || cpu_data->stop_cpu)
+ cpu_relax();
+
+ if (cpu_data->shutdown_cpu) {
+ /* disable APIC */
+ apic_ops.write(APIC_REG_SPIV, 0);
+ vmx_cpu_exit(cpu_data);
+ asm volatile("hlt");
+ }
+
+ spin_lock(&wait_lock);
+
+ cpu_data->cpu_stopped = false;
+ } while (cpu_data->init_signaled);
+
+ if (cpu_data->flush_caches) {
+ cpu_data->flush_caches = false;
+ flush_tlb();
+ vmx_invept();
+ }
+
+ spin_unlock(&wait_lock);
+
+ return cpu_data->sipi_vector;
+}
+
+static void apic_validate_ipi_mode(struct per_cpu *cpu_data, u32 lo_val)
+{
+ switch (lo_val & APIC_ICR_DLVR_MASK) {
+ case APIC_ICR_DLVR_INIT:
+ case APIC_ICR_DLVR_FIXED:
+ case APIC_ICR_DLVR_LOWPRI:
+ case APIC_ICR_DLVR_NMI:
+ case APIC_ICR_DLVR_SIPI:
+ break;
+ default:
+ panic_printk("FATAL: Unsupported APIC delivery mode, "
+ "ICR.lo=%x\n", lo_val);
+ panic_stop(cpu_data);
+ }
+
+ switch (lo_val & APIC_ICR_SH_MASK) {
+ case APIC_ICR_SH_NONE:
+ case APIC_ICR_SH_SELF:
+ break;
+ default:
+ panic_printk("FATAL: Unsupported shorthand, ICR.lo=%x\n",
+ lo_val);
+ panic_stop(cpu_data);
+ }
+}
+
+static void apic_deliver_ipi(struct per_cpu *cpu_data,
+ unsigned int target_cpu_id,
+ u32 orig_icr_hi, u32 icr_lo)
+{
+ struct per_cpu *target_data;
+
+ if (target_cpu_id == APIC_INVALID_ID ||
+ !test_bit(target_cpu_id, cpu_data->cell->cpu_set->bitmap)) {
+ printk("WARNING: CPU %d specified IPI destination outside "
+ "cell boundaries, ICR.hi=%x\n",
+ cpu_data->cpu_id, orig_icr_hi);
+ return;
+ }
+
+ target_data = per_cpu(target_cpu_id);
+
+ switch (icr_lo & APIC_ICR_DLVR_MASK) {
+ case APIC_ICR_DLVR_NMI:
+ /* TODO: must be sent via hypervisor */
+ printk("Ignoring NMI IPI\n");
+ return;
+ case APIC_ICR_DLVR_INIT:
+ spin_lock(&wait_lock);
+
+ if (!target_data->wait_for_sipi)
+ target_data->init_signaled = true;
+
+ spin_unlock(&wait_lock);
+
+ apic_ops.send_ipi(target_data->apic_id,
+ APIC_ICR_DLVR_NMI |
+ APIC_ICR_DEST_PHYSICAL |
+ APIC_ICR_LV_ASSERT |
+ APIC_ICR_TM_EDGE |
+ APIC_ICR_SH_NONE);
+ return;
+ case APIC_ICR_DLVR_SIPI:
+ target_data = per_cpu(target_cpu_id);
+
+ spin_lock(&wait_lock);
+
+ if (target_data->wait_for_sipi) {
+ target_data->wait_for_sipi = false;
+ target_data->sipi_vector =
+ icr_lo & APIC_ICR_VECTOR_MASK;
+ }
+
+ spin_unlock(&wait_lock);
+ return;
+ }
+
+ apic_ops.send_ipi(target_data->apic_id, icr_lo);
+}
+
+static void apic_deliver_logical_dest_ipi(struct per_cpu *cpu_data,
+ unsigned long dest, u32 lo_val,
+ u32 hi_val)
+{
+ unsigned int target_cpu_id;
+ unsigned int logical_id;
+ unsigned int cluster_id;
+ unsigned long dest_mask;
+ unsigned int apic_id;
+
+ if (using_x2apic) {
+ cluster_id = (dest & X2APIC_DEST_CLUSTER_ID_MASK) >>
+ X2APIC_DEST_CLUSTER_ID_SHIFT;
+ dest_mask = ~(dest & X2APIC_DEST_LOGICAL_ID_MASK);
+ while (dest_mask != ~0UL) {
+ logical_id = ffz(dest_mask);
+ dest_mask |= 1UL << logical_id;
+ apic_id = logical_id |
+ (cluster_id << X2APIC_CLUSTER_ID_SHIFT);
+ target_cpu_id = apic_to_cpu_id[apic_id];
+ apic_deliver_ipi(cpu_data, target_cpu_id, hi_val,
+ lo_val);
+ }
+ } else {
+ dest_mask = ~dest;
+ while (dest_mask != ~0UL) {
+ target_cpu_id = ffz(dest_mask);
+ dest_mask |= 1UL << target_cpu_id;
+ apic_deliver_ipi(cpu_data, target_cpu_id, hi_val,
+ lo_val);
+ }
+ }
+}
+
+void apic_handle_icr_write(struct per_cpu *cpu_data, u32 lo_val, u32 hi_val)
+{
+ unsigned int target_cpu_id;
+ unsigned long dest;
+
+ apic_validate_ipi_mode(cpu_data, lo_val);
+
+ dest = hi_val;
+ if (!using_x2apic)
+ dest >>= 24;
+
+ if (lo_val & APIC_ICR_DEST_LOGICAL) {
+ lo_val &= ~APIC_ICR_DEST_LOGICAL;
+ apic_deliver_logical_dest_ipi(cpu_data, dest, lo_val, hi_val);
+ } else {
+ target_cpu_id = APIC_INVALID_ID;
+ if (dest <= APIC_MAX_PHYS_ID)
+ target_cpu_id = apic_to_cpu_id[dest];
+ apic_deliver_ipi(cpu_data, target_cpu_id, hi_val, lo_val);
+ }
+}
+
+unsigned int apic_mmio_access(struct registers *guest_regs,
+ struct per_cpu *cpu_data, unsigned long rip,
+ unsigned long page_table_addr, unsigned int reg,
+ bool is_write)
+{
+ struct mmio_access access;
+ unsigned long val;
+
+ access = mmio_parse(cpu_data, rip, page_table_addr, is_write);
+ if (access.inst_len == 0)
+ return 0;
+ if (access.size != 4) {
+ panic_printk("FATAL: Unsupported APIC access width %d\n",
+ access.size);
+ return 0;
+ }
+ if (is_write) {
+ val = ((unsigned long *)guest_regs)[access.reg];
+ if (reg == APIC_REG_ICR) {
+ apic_handle_icr_write(cpu_data, val,
+ apic_ops.read(APIC_REG_ICR_HI));
+ } else if (reg == APIC_REG_LDR &&
+ val != 1UL << (cpu_data->cpu_id + 24)) {
+ panic_printk("FATAL: Unsupported change to LDR: %x\n",
+ val);
+ return 0;
+ } else if (reg == APIC_REG_DFR && val != 0xffffffff) {
+ panic_printk("FATAL: Unsupported change to DFR: %x\n",
+ val);
+ return 0;
+ } else
+ apic_ops.write(reg, val);
+ } else {
+ val = apic_ops.read(reg);
+ ((unsigned long *)guest_regs)[access.reg] = val;
+ }
+ return access.inst_len;
+}
+
+void x2apic_handle_write(struct registers *guest_regs)
+{
+ u32 reg = guest_regs->rcx;
+
+ if (reg == MSR_X2APIC_SELF_IPI)
+ /* TODO: emulate */
+ printk("Unhandled x2APIC self IPI write\n");
+ else
+ apic_ops.write(reg - MSR_X2APIC_BASE, guest_regs->rax);
+}
+
+void x2apic_handle_read(struct registers *guest_regs)
+{
+ u32 reg = guest_regs->rcx;
+
+ guest_regs->rax &= ~0xffffffffUL;
+ guest_regs->rax |= apic_ops.read(reg - MSR_X2APIC_BASE);
+
+ guest_regs->rdx &= ~0xffffffffUL;
+ if (reg == MSR_X2APIC_ICR)
+ guest_regs->rdx |= apic_ops.read(reg - MSR_X2APIC_BASE + 1);
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/control.h>
+#include <asm/vmx.h>
+
+int arch_cell_create(struct per_cpu *cpu_data, struct cell *new_cell,
+ struct jailhouse_cell_desc *config)
+{
+ unsigned int cpu;
+
+ vmx_cell_shrink(cpu_data->cell, config);
+
+ for_each_cpu_except(cpu, cpu_data->cell->cpu_set, cpu_data->cpu_id)
+ per_cpu(cpu)->flush_caches = true;
+
+ return vmx_cell_init(new_cell, config);
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/printk.h>
+#include <jailhouse/processor.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_UART_OXPCIE952
+#define UART_BASE 0xe010
+#else
+#define UART_BASE 0x3f8
+#endif
+#define UART_TX 0x0
+#define UART_DLL 0x0
+#define UART_DLM 0x1
+#define UART_LCR 0x3
+#define UART_LCR_8N1 0x03
+#define UART_LCR_DLAB 0x80
+#define UART_LSR 0x5
+#define UART_LSR_THRE 0x20
+
+void arch_dbg_write_init(void)
+{
+ outb(UART_LCR_DLAB, UART_BASE + UART_LCR);
+#ifdef CONFIG_UART_OXPCIE952
+ outb(0x22, UART_BASE + UART_DLL);
+#else
+ outb(1, UART_BASE + UART_DLL);
+#endif
+ outb(0, UART_BASE + UART_DLM);
+ outb(UART_LCR_8N1, UART_BASE + UART_LCR);
+}
+
+void arch_dbg_write(const char *msg)
+{
+ char c;
+
+ while (1) {
+ c = *msg++;
+ if (!c)
+ break;
+ while (!(inb(UART_BASE + UART_LSR) & UART_LSR_THRE))
+ cpu_relax();
+ if (panic_in_progress && panic_cpu != phys_processor_id())
+ break;
+ outb(c, UART_BASE + UART_TX);
+ }
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/percpu.h>
+
+/* Entry point for Linux loader module on JAILHOUSE_ENABLE */
+ .text
+ .globl arch_entry
+arch_entry:
+ cli
+
+ push %rbp
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov %rdi,%rdx
+ shl $PERCPU_SIZE_SHIFT,%rdi
+ lea __page_pool(%rip),%rax
+ add %rax,%rdi
+
+ mov %rsp,PERCPU_LINUX_SP(%rdi)
+ mov %edx,PERCPU_CPU_ID(%rdi)
+
+ lea PERCPU_STACK_END-8(%rdi),%rsp
+
+ push %rdi
+
+ call entry
+
+ pop %rdi
+
+ mov PERCPU_LINUX_SP(%rdi),%rsp
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
+ pop %rbp
+
+ ret
+
+
+/* Fix up Global Offset Table with absolute hypervisor address */
+ .globl got_init
+got_init:
+ lea __got_start(%rip),%rdx
+ lea __got_end(%rip),%rcx
+ lea hypervisor_header(%rip),%rax
+
+got_loop:
+ cmp %rdx,%rcx
+ je got_done
+
+ add %rax,(%rdx)
+ add $8,%rdx
+ jmp got_loop
+
+got_done:
+ ret
+
+
+/* Exception/interrupt entry points */
+.macro no_error_entry vector
+ .balign 16
+ pushq $-1
+ pushq $\vector
+ mov %rsp,%rdi
+ call exception_handler
+1: jmp 1b
+.endm
+
+.macro error_entry vector
+ .balign 16
+ pushq $\vector
+ mov %rsp,%rdi
+ call exception_handler
+1: jmp 1b
+.endm
+
+ .global exception_entries
+ .balign 16
+exception_entries:
+ no_error_entry 0
+ no_error_entry 1
+vector=3
+.rept 5
+ no_error_entry vector
+ vector=vector+1
+.endr
+ error_entry 8
+ no_error_entry 9
+vector=10
+.rept 5
+ error_entry vector
+ vector=vector+1
+.endr
+ no_error_entry 16
+ error_entry 17
+ no_error_entry 18
+ no_error_entry 19
+
+
+ .global nmi_entry
+ .balign 16
+nmi_entry:
+ push %rax
+ push %rcx
+ push %rdx
+ push %rsi
+ push %rdi
+ push %r8
+ push %r9
+ push %r10
+ push %r11
+
+ mov %rsp,%rdi
+ and $PAGE_MASK,%rdi
+ call apic_nmi_handler
+
+ pop %r11
+ pop %r10
+ pop %r9
+ pop %r8
+ pop %rdi
+ pop %rsi
+ pop %rdx
+ pop %rcx
+ pop %rax
+
+ iretq
+
+
+/* VM-exit handling */
+ .globl vm_exit
+vm_exit:
+ push %rax
+ push %rcx
+ push %rdx
+ push %rbx
+ sub $8,%rsp /* placeholder for rsp */
+ push %rbp
+ push %rsi
+ push %rdi
+ push %r8
+ push %r9
+ push %r10
+ push %r11
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov %rsp,%rdi
+ lea -PERCPU_STACK_END+16*8(%rsp),%rsi
+ call vmx_handle_exit
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %r11
+ pop %r10
+ pop %r9
+ pop %r8
+ pop %rdi
+ pop %rsi
+ pop %rbp
+ add $8,%rsp
+ pop %rbx
+ pop %rdx
+ pop %rcx
+ pop %rax
+
+ vmresume
+
+ lea -PERCPU_STACK_END(%rsp),%rdi
+ jmp vmx_entry_failure
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/printk.h>
+#include <jailhouse/processor.h>
+#include <asm/types.h>
+#include <asm/fault.h>
+#include <asm/vmx.h>
+
+struct exception_frame {
+ u64 vector;
+ u64 error;
+ u64 rip;
+ u64 cs;
+ u64 flags;
+ u64 rsp;
+ u64 ss;
+};
+
+void exception_handler(struct exception_frame *frame)
+{
+ panic_printk("FATAL: Jailhouse triggered exception #%d\n",
+ frame->vector);
+ if (frame->error != -1)
+ panic_printk("Error code: %x\n", frame->error);
+ panic_printk("Physical CPU ID: %d\n", phys_processor_id());
+ panic_printk("RIP: %p RSP: %p FLAGS: %x\n", frame->rip, frame->rsp,
+ frame->flags);
+
+ panic_stop(NULL);
+}
+
+void panic_stop(struct per_cpu *cpu_data)
+{
+ panic_printk("Stopping CPU");
+ if (cpu_data) {
+ panic_printk(" %d", cpu_data->cpu_id);
+ cpu_data->cpu_stopped = true;
+ vmx_cpu_exit(cpu_data);
+ }
+ panic_printk("\n");
+
+ if (phys_processor_id() == panic_cpu)
+ panic_in_progress = 0;
+
+ asm volatile("1: hlt; jmp 1b");
+ __builtin_unreachable();
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/percpu.h>
+
+/* currently our limit due to fixed-size APID ID map */
+#define APIC_MAX_PHYS_ID 254
+#define APIC_INVALID_ID 255
+
+#define XAPIC_BASE 0xfee00000
+
+#define APIC_BASE_EXTD (1 << 10)
+#define APIC_BASE_EN (1 << 11)
+
+#define APIC_REG_ID 0x02
+#define APIC_REG_LDR 0x0d
+#define APIC_REG_DFR 0x0e
+#define APIC_REG_SPIV 0x0f
+#define APIC_REG_ICR 0x30
+#define APIC_REG_ICR_HI 0x31
+
+#define APIC_ICR_VECTOR_MASK 0x000000ff
+#define APIC_ICR_DLVR_MASK 0x00000700
+#define APIC_ICR_DLVR_FIXED 0x00000000
+#define APIC_ICR_DLVR_LOWPRI 0x00000100
+#define APIC_ICR_DLVR_SMI 0x00000200
+#define APIC_ICR_DLVR_NMI 0x00000400
+#define APIC_ICR_DLVR_INIT 0x00000500
+#define APIC_ICR_DLVR_SIPI 0x00000600
+#define APIC_ICR_DEST_PHYSICAL 0x00000000
+#define APIC_ICR_DEST_LOGICAL 0x00000800
+#define APIC_ICR_DS_PENDING 0x00001000
+#define APIC_ICR_LV_DEASSERT 0x00000000
+#define APIC_ICR_LV_ASSERT 0x00004000
+#define APIC_ICR_TM_EDGE 0x00000000
+#define APIC_ICR_TM_LEVEL 0x00008000
+#define APIC_ICR_SH_MASK 0x000c0000
+#define APIC_ICR_SH_NONE 0x00000000
+#define APIC_ICR_SH_SELF 0x00040000
+#define APIC_ICR_SH_ALL 0x00080000
+#define APIC_ICR_SH_ALLOTHER 0x000c0000
+
+#define X2APIC_DEST_LOGICAL_ID_MASK 0x0000ffff
+#define X2APIC_DEST_CLUSTER_ID_MASK 0xffff0000
+#define X2APIC_DEST_CLUSTER_ID_SHIFT 16
+
+#define X2APIC_CLUSTER_ID_SHIFT 4
+
+#define APIC_BSP_PSEUDO_SIPI 0x100
+
+extern bool using_x2apic;
+
+int apic_init(void);
+int apic_cpu_init(struct per_cpu *cpu_data);
+
+void apic_nmi_handler(struct per_cpu *cpu_data);
+int apic_handle_events(struct per_cpu *cpu_data);
+
+void apic_handle_icr_write(struct per_cpu *cpu_data, u32 lo_val, u32 hi_val);
+
+unsigned int apic_mmio_access(struct registers *guest_regs,
+ struct per_cpu *cpu_data, unsigned long rip,
+ unsigned long page_table_addr, unsigned int reg,
+ bool is_write);
+
+void x2apic_handle_write(struct registers *guest_regs);
+void x2apic_handle_read(struct registers *guest_regs);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_BITOPS_H
+#define _JAILHOUSE_ASM_BITOPS_H
+
+#include <asm/types.h>
+
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
+/* Technically wrong, but this avoids compilation errors on some gcc
+ versions. */
+#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
+#else
+#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#endif
+
+#define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK(nr) (1 << ((nr) & 7))
+
+static inline __attribute__((always_inline)) void
+clear_bit(int nr, volatile unsigned long *addr)
+{
+ if (__builtin_constant_p(nr)) {
+ asm volatile("lock andb %1,%0"
+ : CONST_MASK_ADDR(nr, addr)
+ : "iq" ((u8)~CONST_MASK(nr)));
+ } else {
+ asm volatile("lock btr %1,%0"
+ : BITOP_ADDR(addr)
+ : "Ir" (nr));
+ }
+}
+
+static inline __attribute__((always_inline)) void
+set_bit(unsigned int nr, volatile unsigned long *addr)
+{
+ if (__builtin_constant_p(nr)) {
+ asm volatile("lock orb %1,%0"
+ : CONST_MASK_ADDR(nr, addr)
+ : "iq" ((u8)CONST_MASK(nr))
+ : "memory");
+ } else {
+ asm volatile("lock bts %1,%0"
+ : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+ }
+}
+
+static inline __attribute__((always_inline)) int
+constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
+{
+ return ((1UL << (nr % BITS_PER_LONG)) &
+ (addr[nr / BITS_PER_LONG])) != 0;
+}
+
+static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
+{
+ int oldbit;
+
+ asm volatile("bt %2,%1\n\t"
+ "sbb %0,%0"
+ : "=r" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr));
+
+ return oldbit;
+}
+
+#define test_bit(nr, addr) \
+ (__builtin_constant_p((nr)) \
+ ? constant_test_bit((nr), (addr)) \
+ : variable_test_bit((nr), (addr)))
+
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ int oldbit;
+
+ asm volatile("lock bts %2,%1\n\t"
+ "sbb %0,%0" : "=r" (oldbit), BITOP_ADDR(addr)
+ : "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+
+static inline unsigned long ffz(unsigned long word)
+{
+ asm("rep; bsf %1,%0"
+ : "=r" (word)
+ : "r" (~word));
+ return word;
+}
+
+#endif /* !_JAILHOUSE_ASM_BITOPS_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_CELL_H
+#define _JAILHOUSE_ASM_CELL_H
+
+#include <asm/types.h>
+#include <asm/paging.h>
+
+#include <jailhouse/cell-config.h>
+
+struct cell {
+ struct {
+ /* should be first as it requires page alignment */
+ u8 __attribute__((aligned(PAGE_SIZE))) io_bitmap[2*PAGE_SIZE];
+ pgd_t *ept;
+ } vmx;
+
+ char name[JAILHOUSE_CELL_NAME_MAXLEN+1];
+ unsigned int id;
+
+ struct cpu_set *cpu_set;
+ struct cpu_set small_cpu_set;
+
+ unsigned long page_offset;
+
+ struct cell *next;
+};
+
+extern struct cell *cell_list;
+
+#endif /* !_JAILHOUSE_ASM_CELL_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/percpu.h>
+
+struct exception_frame;
+
+void __attribute__((noreturn))
+exception_handler(struct exception_frame *frame);
+
+void __attribute__((noreturn)) panic_stop(struct per_cpu *cpu_data);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/types.h>
+
+static inline void outb(u8 v, u16 port)
+{
+ asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
+}
+
+static inline u8 inb(u16 port)
+{
+ u8 v;
+ asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
+ return v;
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#define JAILHOUSE_CALL_INS "vmcall"
+#define JAILHOUSE_CALL_RESULT "=a" (result)
+#define JAILHOUSE_CALL_NUM "a" (num)
+#define JAILHOUSE_CALL_ARG1 "D" (arg1)
+#define JAILHOUSE_CALL_ARG2 "S" (arg2)
+#define JAILHOUSE_CALL_ARG3 "d" (arg3)
+#define JAILHOUSE_CALL_ARG4 "c" (arg4)
+
+static inline __u32 jailhouse_call0(__u32 num)
+{
+ __u32 result;
+
+ asm volatile(JAILHOUSE_CALL_INS
+ : JAILHOUSE_CALL_RESULT
+ : JAILHOUSE_CALL_NUM
+ : "memory");
+ return result;
+}
+
+static inline __u32 jailhouse_call1(__u32 num, __u32 arg1)
+{
+ __u32 result;
+
+ asm volatile(JAILHOUSE_CALL_INS
+ : JAILHOUSE_CALL_RESULT
+ : JAILHOUSE_CALL_NUM, JAILHOUSE_CALL_ARG1
+ : "memory");
+ return result;
+}
+
+static inline __u32 jailhouse_call2(__u32 num, __u32 arg1, __u32 arg2)
+{
+ __u32 result;
+
+ asm volatile(JAILHOUSE_CALL_INS
+ : JAILHOUSE_CALL_RESULT
+ : JAILHOUSE_CALL_NUM, JAILHOUSE_CALL_ARG1, JAILHOUSE_CALL_ARG2
+ : "memory");
+ return result;
+}
+
+static inline __u32 jailhouse_call3(__u32 num, __u32 arg1, __u32 arg2,
+ __u32 arg3)
+{
+ __u32 result;
+
+ asm volatile(JAILHOUSE_CALL_INS
+ : JAILHOUSE_CALL_RESULT
+ : JAILHOUSE_CALL_NUM, JAILHOUSE_CALL_ARG1, JAILHOUSE_CALL_ARG2,
+ JAILHOUSE_CALL_ARG3
+ : "memory");
+ return result;
+}
+
+static inline __u32 jailhouse_call4(__u32 num, __u32 arg1, __u32 arg2,
+ __u32 arg3, __u32 arg4)
+{
+ __u32 result;
+
+ asm volatile(JAILHOUSE_CALL_INS
+ : JAILHOUSE_CALL_RESULT
+ : JAILHOUSE_CALL_NUM, JAILHOUSE_CALL_ARG1, JAILHOUSE_CALL_ARG2,
+ JAILHOUSE_CALL_ARG3, JAILHOUSE_CALL_ARG4
+ : "memory");
+ return result;
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PAGING_H
+#define _JAILHOUSE_ASM_PAGING_H
+
+#include <asm/types.h>
+#include <asm/processor.h>
+
+#define PAGE_SIZE 4096
+#define PAGE_MASK ~(PAGE_SIZE - 1)
+
+#define PAGE_DIR_LEVELS 4
+
+#define PAGE_TABLE_OFFS_MASK 0x0000000000000ff8UL
+#define PAGE_ADDR_MASK 0x000ffffffffff000UL
+#define PAGE_OFFS_MASK 0x0000000000000fffUL
+#define HUGEPAGE_ADDR_MASK 0x000fffffffe00000UL
+#define HUGEPAGE_OFFS_MASK 0x00000000001fffffUL
+
+#define PAGE_FLAG_PRESENT 0x01
+#define PAGE_FLAG_RW 0x02
+#define PAGE_FLAG_UNCACHED 0x10
+
+#define PAGE_DEFAULT_FLAGS (PAGE_FLAG_PRESENT | PAGE_FLAG_RW )
+#define PAGE_READONLY_FLAGS PAGE_FLAG_PRESENT
+
+#define INVALID_PHYS_ADDR (~0UL)
+
+#define REMAP_BASE_ADDR 0x0000000000100000UL
+#define NUM_REMAP_BITMAP_PAGES 1
+
+#define FOREIGN_MAPPING_BASE REMAP_BASE_ADDR
+#define NUM_FOREIGN_PAGES 16
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long pgd_t;
+typedef unsigned long pud_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pte_t;
+
+static inline bool pgd_valid(pgd_t *pgd)
+{
+ return *pgd & 1;
+}
+
+static inline pgd_t *pgd_offset(pgd_t *page_table, unsigned long addr)
+{
+ return (pgd_t *)((unsigned long)page_table +
+ ((addr >> 36) & PAGE_TABLE_OFFS_MASK));
+}
+
+static inline void set_pgd(pgd_t *pgd, unsigned long addr, unsigned long flags)
+{
+ *pgd = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pgd(pgd_t *pgd)
+{
+ *pgd = 0;
+}
+
+static inline bool pud_valid(pud_t *pud)
+{
+ return *pud & 1;
+}
+
+static inline pud_t *pud4l_offset(pgd_t *pgd, unsigned long page_table_offset,
+ unsigned long addr)
+{
+ unsigned long pud = (*pgd & PAGE_ADDR_MASK) +
+ ((addr >> 27) & PAGE_TABLE_OFFS_MASK);
+
+ return (pud_t *)(pud + page_table_offset);
+}
+
+static inline pud_t *pud3l_offset(pgd_t *page_table, unsigned long addr)
+{
+ return (pud_t *)((unsigned long)page_table +
+ ((addr >> 27) & PAGE_TABLE_OFFS_MASK));
+}
+
+static inline void set_pud(pud_t *pud, unsigned long addr, unsigned long flags)
+{
+ *pud = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pud(pud_t *pud)
+{
+ *pud = 0;
+}
+
+static inline bool pmd_valid(pmd_t *pmd)
+{
+ return *pmd & 1;
+}
+
+static inline bool pmd_is_hugepage(pmd_t *pmd)
+{
+ return *pmd & (1 << 7);
+}
+
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long page_table_offset,
+ unsigned long addr)
+{
+ unsigned long pmd = (*pud & PAGE_ADDR_MASK) +
+ ((addr >> 18) & PAGE_TABLE_OFFS_MASK);
+
+ return (pmd_t *)(pmd + page_table_offset);
+}
+
+static inline void set_pmd(pmd_t *pmd, unsigned long addr, unsigned long flags)
+{
+ *pmd = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pmd(pmd_t *pmd)
+{
+ *pmd = 0;
+}
+
+static inline bool pte_valid(pte_t *pte)
+{
+ return *pte & 1;
+}
+
+static inline pte_t *pte_offset(pmd_t *pmd, unsigned long page_table_offset,
+ unsigned long addr)
+{
+ unsigned long pte = (*pmd & PAGE_ADDR_MASK) +
+ ((addr >> 9) & PAGE_TABLE_OFFS_MASK);
+
+ return (pte_t *)(pte + page_table_offset);
+}
+
+static inline void set_pte(pte_t *pte, unsigned long addr, unsigned long flags)
+{
+ *pte = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pte(pte_t *pte)
+{
+ *pte = 0;
+}
+
+static inline unsigned long phys_address(pte_t *pte, unsigned long addr)
+{
+ return (*pte & PAGE_ADDR_MASK) + (addr & PAGE_OFFS_MASK);
+}
+
+static inline unsigned long phys_address_hugepage(pmd_t *pmd,
+ unsigned long addr)
+{
+ return (*pmd & HUGEPAGE_ADDR_MASK) + (addr & HUGEPAGE_OFFS_MASK);
+}
+
+static inline bool pud_empty(pgd_t *pgd, unsigned long page_table_offset)
+{
+ pud_t *pud = (pud_t *)((*pgd & PAGE_ADDR_MASK) + page_table_offset);
+ int n;
+
+ for (n = 0; n < PAGE_SIZE / sizeof(pud_t); n++, pud++)
+ if (pud_valid(pud))
+ return false;
+ return true;
+}
+
+static inline bool pmd_empty(pud_t *pud, unsigned long page_table_offset)
+{
+ pmd_t *pmd = (pmd_t *)((*pud & PAGE_ADDR_MASK) + page_table_offset);
+ int n;
+
+ for (n = 0; n < PAGE_SIZE / sizeof(pmd_t); n++, pmd++)
+ if (pmd_valid(pmd))
+ return false;
+ return true;
+}
+
+static inline bool pt_empty(pmd_t *pmd, unsigned long page_table_offset)
+{
+ pte_t *pte = (pte_t *)((*pmd & PAGE_ADDR_MASK) + page_table_offset);
+ int n;
+
+ for (n = 0; n < PAGE_SIZE / sizeof(pte_t); n++, pte++)
+ if (pte_valid(pte))
+ return false;
+ return true;
+}
+
+static inline void flush_tlb(void)
+{
+ unsigned long cr4 = read_cr4();
+
+ write_cr4(cr4 & ~X86_CR4_PGE);
+ write_cr4(cr4);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PAGING_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PERCPU_H
+#define _JAILHOUSE_ASM_PERCPU_H
+
+#include <asm/types.h>
+#include <asm/paging.h>
+
+#define NUM_ENTRY_REGS 6
+
+/* Keep in sync with struct per_cpu! */
+#define PERCPU_SIZE_SHIFT 14
+#define PERCPU_STACK_END PAGE_SIZE
+#define PERCPU_LINUX_SP PERCPU_STACK_END
+#define PERCPU_CPU_ID (PERCPU_LINUX_SP + 8)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/cell.h>
+
+struct per_cpu {
+ /* Keep these three in sync with defines above! */
+ u8 stack[PAGE_SIZE];
+ unsigned long linux_sp;
+ unsigned int cpu_id;
+
+ u32 apic_id;
+ struct cell *cell;
+
+ struct desc_table_reg linux_gdtr;
+ struct desc_table_reg linux_idtr;
+ unsigned long linux_reg[NUM_ENTRY_REGS];
+ unsigned long linux_ip;
+ unsigned long linux_cr3;
+ unsigned long linux_cs;
+ unsigned long linux_tr;
+ unsigned long linux_tr_base;
+ u32 linux_tr_limit;
+ u32 linux_tr_ar_bytes;
+ unsigned long linux_efer;
+ unsigned long linux_fs_base;
+ unsigned long linux_gs_base;
+ unsigned long linux_sysenter_cs;
+ unsigned long linux_sysenter_eip;
+ unsigned long linux_sysenter_esp;
+ bool initialized;
+ enum { VMXOFF = 0, VMXON, VMCS_READY } vmx_state;
+
+ volatile bool stop_cpu;
+ volatile bool wait_for_sipi;
+ volatile bool cpu_stopped;
+ bool init_signaled;
+ int sipi_vector;
+ bool flush_caches;
+ bool shutdown_cpu;
+
+ u8 vmxon_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+ u8 vmcs_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+} __attribute__((aligned(PAGE_SIZE)));
+
+static inline struct per_cpu *per_cpu(unsigned int cpu)
+{
+ struct per_cpu *cpu_data;
+
+ asm volatile(
+ "lea __page_pool(%%rip),%0\n\t"
+ "add %1,%0\n\t"
+ : "=&qm" (cpu_data)
+ : "qm" ((unsigned long)cpu << PERCPU_SIZE_SHIFT));
+ return cpu_data;
+}
+
+/* Validate defines */
+#define CHECK_ASSUMPTION(assume) ((void)sizeof(char[1 - 2*!(assume)]))
+
+static inline void __check_assumptions(void)
+{
+ struct per_cpu cpu_data;
+
+ CHECK_ASSUMPTION(sizeof(struct per_cpu) == (1 << PERCPU_SIZE_SHIFT));
+ CHECK_ASSUMPTION(sizeof(cpu_data.stack) == PERCPU_STACK_END);
+ CHECK_ASSUMPTION(__builtin_offsetof(struct per_cpu, linux_sp) ==
+ PERCPU_LINUX_SP);
+ CHECK_ASSUMPTION(__builtin_offsetof(struct per_cpu, cpu_id) ==
+ PERCPU_CPU_ID);
+}
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PERCPU_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PROCESSOR_H
+#define _JAILHOUSE_ASM_PROCESSOR_H
+
+#include <asm/types.h>
+
+#define X86_FEATURE_VMX (1 << 5)
+
+#define X86_CR0_PE 0x00000001
+#define X86_CR0_ET 0x00000010
+#define X86_CR0_NW 0x20000000
+#define X86_CR0_CD 0x40000000
+#define X86_CR0_PG 0x80000000
+
+#define X86_CR4_PGE 0x00000080
+#define X86_CR4_VMXE 0x00002000
+
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_FEATURE_CONTROL 0x0000003a
+#define MSR_IA32_SYSENTER_CS 0x00000174
+#define MSR_IA32_SYSENTER_ESP 0x00000175
+#define MSR_IA32_SYSENTER_EIP 0x00000176
+#define MSR_IA32_VMX_BASIC 0x00000480
+#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
+#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
+#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
+#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
+#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
+#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
+#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
+#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
+#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
+#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
+#define MSR_X2APIC_BASE 0x00000800
+#define MSR_X2APIC_ICR 0x00000830
+#define MSR_X2APIC_SELF_IPI 0x0000083f
+#define MSR_X2APIC_END MSR_X2APIC_SELF_IPI
+#define MSR_EFER 0xc0000080
+#define MSR_FS_BASE 0xc0000100
+#define MSR_GS_BASE 0xc0000101
+
+#define FEATURE_CONTROL_LOCKED (1 << 0)
+#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1 << 2)
+
+#define EFER_LME 0x00000100
+#define EFER_LMA 0x00000400
+
+#define GDT_DESC_NULL 0
+#define GDT_DESC_CODE 1
+#define GDT_DESC_TSS 2
+#define GDT_DESC_TSS_HI 3
+#define NUM_GDT_DESC 4
+
+#define X86_INST_LEN_CPUID 2
+#define X86_INST_LEN_RDMSR 2
+#define X86_INST_LEN_WRMSR 2
+#define X86_INST_LEN_VMCALL 3
+#define X86_INST_LEN_MOV_TO_CR 3
+
+#define X86_OP_REGR_PREFIX 0x44
+#define X86_OP_MOV_TO_MEM 0x89
+#define X86_OP_MOV_FROM_MEM 0x8b
+
+#define NMI_VECTOR 2
+
+#ifndef __ASSEMBLY__
+
+struct registers {
+ unsigned long r15;
+ unsigned long r14;
+ unsigned long r13;
+ unsigned long r12;
+ unsigned long r11;
+ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
+ unsigned long rdi;
+ unsigned long rsi;
+ unsigned long rbp;
+ unsigned long unused;
+ unsigned long rbx;
+ unsigned long rdx;
+ unsigned long rcx;
+ unsigned long rax;
+};
+
+static unsigned long __force_order;
+
+static inline void cpu_relax(void)
+{
+ asm volatile("rep; nop");
+}
+
+static inline void memory_barrier(void)
+{
+ asm volatile("mfence" : : : "memory");
+}
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile("cpuid"
+ : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+ : "0" (*eax), "2" (*ecx)
+ : "memory");
+}
+
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ *eax =op;
+ *ecx = 0;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return ecx;
+}
+
+static inline unsigned long read_cr0(void)
+{
+ unsigned long cr0;
+
+ asm volatile("mov %%cr0,%0" : "=r" (cr0), "=m" (__force_order));
+ return cr0;
+}
+
+static inline void write_cr0(unsigned long val)
+{
+ asm volatile("mov %0,%%cr0" : : "r" (val), "m" (__force_order));
+}
+
+static inline unsigned long read_cr3(void)
+{
+ unsigned long cr3;
+
+ asm volatile("mov %%cr3,%0" : "=r" (cr3), "=m" (__force_order));
+ return cr3;
+}
+
+static inline void write_cr3(unsigned long val)
+{
+ asm volatile("mov %0,%%cr3" : : "r" (val), "m" (__force_order));
+}
+
+static inline unsigned long read_cr4(void)
+{
+ unsigned long cr4;
+
+ asm volatile("mov %%cr4,%0" : "=r" (cr4), "=m" (__force_order));
+ return cr4;
+}
+
+static inline void write_cr4(unsigned long val)
+{
+ asm volatile("mov %0,%%cr4" : : "r" (val), "m" (__force_order));
+}
+
+static inline unsigned long read_msr(unsigned int msr)
+{
+ u32 low, high;
+
+ asm volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr));
+ return low | ((unsigned long)high << 32);
+}
+
+static inline void write_msr(unsigned int msr, unsigned long val)
+{
+ asm volatile("wrmsr"
+ : /* no output */
+ : "c" (msr), "a" (val), "d" (val >> 32)
+ : "memory");
+}
+
+static inline void read_gdtr(struct desc_table_reg *val)
+{
+ asm volatile("sgdtq %0" : "=m" (*val));
+}
+
+static inline void write_gdtr(struct desc_table_reg *val)
+{
+ asm volatile("lgdtq %0" : "=m" (*val));
+}
+
+static inline void read_idtr(struct desc_table_reg *val)
+{
+ asm volatile("sidtq %0" : "=m" (*val));
+}
+
+static inline void write_idtr(struct desc_table_reg *val)
+{
+ asm volatile("lidtq %0" : "=m" (*val));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PROCESSOR_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/bitops.h>
+#include <asm/processor.h>
+
+typedef struct {
+ unsigned long state;
+} spinlock_t;
+
+#define DEFINE_SPINLOCK(name) spinlock_t (name)
+
+static inline void spin_lock(spinlock_t *lock)
+{
+ while (test_and_set_bit(0, &lock->state))
+ cpu_relax();
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+ asm volatile("": : :"memory");
+ clear_bit(0, &lock->state);
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_TYPES_H
+#define _JAILHOUSE_ASM_TYPES_H
+
+#define NULL ((void *)0)
+
+#define BITS_PER_LONG 64
+
+#ifndef __ASSEMBLY__
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long s64;
+typedef unsigned long u64;
+
+typedef s8 __s8;
+typedef u8 __u8;
+
+typedef s16 __s16;
+typedef u16 __u16;
+
+typedef s32 __s32;
+typedef u32 __u32;
+
+typedef s64 __s64;
+typedef u64 __u64;
+
+typedef enum { true=1, false=0 } bool;
+
+struct desc_table_reg {
+ u16 limit;
+ u64 base;
+} __attribute__((packed));
+
+struct cpu_set {
+ unsigned long max_cpu_id;
+ /* Note: The bitmap is supposed to be extended by embedding this
+ * struct into a larger buffer. */
+ unsigned long bitmap[1];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_TYPES_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/types.h>
+#include <asm/paging.h>
+#include <asm/processor.h>
+
+#include <jailhouse/cell-config.h>
+
+/* VMCS Encodings */
+enum vmcs_field {
+ VIRTUAL_PROCESSOR_ID = 0x00000000,
+ GUEST_ES_SELECTOR = 0x00000800,
+ GUEST_CS_SELECTOR = 0x00000802,
+ GUEST_SS_SELECTOR = 0x00000804,
+ GUEST_DS_SELECTOR = 0x00000806,
+ GUEST_FS_SELECTOR = 0x00000808,
+ GUEST_GS_SELECTOR = 0x0000080a,
+ GUEST_LDTR_SELECTOR = 0x0000080c,
+ GUEST_TR_SELECTOR = 0x0000080e,
+ HOST_ES_SELECTOR = 0x00000c00,
+ HOST_CS_SELECTOR = 0x00000c02,
+ HOST_SS_SELECTOR = 0x00000c04,
+ HOST_DS_SELECTOR = 0x00000c06,
+ HOST_FS_SELECTOR = 0x00000c08,
+ HOST_GS_SELECTOR = 0x00000c0a,
+ HOST_TR_SELECTOR = 0x00000c0c,
+ IO_BITMAP_A = 0x00002000,
+ IO_BITMAP_A_HIGH = 0x00002001,
+ IO_BITMAP_B = 0x00002002,
+ IO_BITMAP_B_HIGH = 0x00002003,
+ MSR_BITMAP = 0x00002004,
+ MSR_BITMAP_HIGH = 0x00002005,
+ VM_EXIT_MSR_STORE_ADDR = 0x00002006,
+ VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
+ VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
+ VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
+ VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
+ VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
+ TSC_OFFSET = 0x00002010,
+ TSC_OFFSET_HIGH = 0x00002011,
+ VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
+ VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
+ APIC_ACCESS_ADDR = 0x00002014,
+ APIC_ACCESS_ADDR_HIGH = 0x00002015,
+ EPT_POINTER = 0x0000201a,
+ EPT_POINTER_HIGH = 0x0000201b,
+ GUEST_PHYSICAL_ADDRESS = 0x00002400,
+ GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
+ VMCS_LINK_POINTER = 0x00002800,
+ VMCS_LINK_POINTER_HIGH = 0x00002801,
+ GUEST_IA32_DEBUGCTL = 0x00002802,
+ GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_IA32_PAT = 0x00002804,
+ GUEST_IA32_PAT_HIGH = 0x00002805,
+ GUEST_IA32_EFER = 0x00002806,
+ GUEST_IA32_EFER_HIGH = 0x00002807,
+ GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+ GUEST_PDPTR0 = 0x0000280a,
+ GUEST_PDPTR0_HIGH = 0x0000280b,
+ GUEST_PDPTR1 = 0x0000280c,
+ GUEST_PDPTR1_HIGH = 0x0000280d,
+ GUEST_PDPTR2 = 0x0000280e,
+ GUEST_PDPTR2_HIGH = 0x0000280f,
+ GUEST_PDPTR3 = 0x00002810,
+ GUEST_PDPTR3_HIGH = 0x00002811,
+ HOST_IA32_PAT = 0x00002c00,
+ HOST_IA32_PAT_HIGH = 0x00002c01,
+ HOST_IA32_EFER = 0x00002c02,
+ HOST_IA32_EFER_HIGH = 0x00002c03,
+ HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+ PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
+ CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
+ EXCEPTION_BITMAP = 0x00004004,
+ PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
+ PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
+ CR3_TARGET_COUNT = 0x0000400a,
+ VM_EXIT_CONTROLS = 0x0000400c,
+ VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
+ VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
+ VM_ENTRY_CONTROLS = 0x00004012,
+ VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
+ VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
+ VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
+ VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
+ TPR_THRESHOLD = 0x0000401c,
+ SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
+ VM_INSTRUCTION_ERROR = 0x00004400,
+ VM_EXIT_REASON = 0x00004402,
+ VM_EXIT_INTR_INFO = 0x00004404,
+ VM_EXIT_INTR_ERROR_CODE = 0x00004406,
+ IDT_VECTORING_INFO_FIELD = 0x00004408,
+ IDT_VECTORING_ERROR_CODE = 0x0000440a,
+ VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
+ VMX_INSTRUCTION_INFO = 0x0000440e,
+ GUEST_ES_LIMIT = 0x00004800,
+ GUEST_CS_LIMIT = 0x00004802,
+ GUEST_SS_LIMIT = 0x00004804,
+ GUEST_DS_LIMIT = 0x00004806,
+ GUEST_FS_LIMIT = 0x00004808,
+ GUEST_GS_LIMIT = 0x0000480a,
+ GUEST_LDTR_LIMIT = 0x0000480c,
+ GUEST_TR_LIMIT = 0x0000480e,
+ GUEST_GDTR_LIMIT = 0x00004810,
+ GUEST_IDTR_LIMIT = 0x00004812,
+ GUEST_ES_AR_BYTES = 0x00004814,
+ GUEST_CS_AR_BYTES = 0x00004816,
+ GUEST_SS_AR_BYTES = 0x00004818,
+ GUEST_DS_AR_BYTES = 0x0000481a,
+ GUEST_FS_AR_BYTES = 0x0000481c,
+ GUEST_GS_AR_BYTES = 0x0000481e,
+ GUEST_LDTR_AR_BYTES = 0x00004820,
+ GUEST_TR_AR_BYTES = 0x00004822,
+ GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
+ GUEST_ACTIVITY_STATE = 0X00004826,
+ GUEST_SYSENTER_CS = 0x0000482A,
+ VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ HOST_IA32_SYSENTER_CS = 0x00004c00,
+ CR0_GUEST_HOST_MASK = 0x00006000,
+ CR4_GUEST_HOST_MASK = 0x00006002,
+ CR0_READ_SHADOW = 0x00006004,
+ CR4_READ_SHADOW = 0x00006006,
+ CR3_TARGET_VALUE0 = 0x00006008,
+ CR3_TARGET_VALUE1 = 0x0000600a,
+ CR3_TARGET_VALUE2 = 0x0000600c,
+ CR3_TARGET_VALUE3 = 0x0000600e,
+ EXIT_QUALIFICATION = 0x00006400,
+ GUEST_LINEAR_ADDRESS = 0x0000640a,
+ GUEST_CR0 = 0x00006800,
+ GUEST_CR3 = 0x00006802,
+ GUEST_CR4 = 0x00006804,
+ GUEST_ES_BASE = 0x00006806,
+ GUEST_CS_BASE = 0x00006808,
+ GUEST_SS_BASE = 0x0000680a,
+ GUEST_DS_BASE = 0x0000680c,
+ GUEST_FS_BASE = 0x0000680e,
+ GUEST_GS_BASE = 0x00006810,
+ GUEST_LDTR_BASE = 0x00006812,
+ GUEST_TR_BASE = 0x00006814,
+ GUEST_GDTR_BASE = 0x00006816,
+ GUEST_IDTR_BASE = 0x00006818,
+ GUEST_DR7 = 0x0000681a,
+ GUEST_RSP = 0x0000681c,
+ GUEST_RIP = 0x0000681e,
+ GUEST_RFLAGS = 0x00006820,
+ GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
+ GUEST_SYSENTER_ESP = 0x00006824,
+ GUEST_SYSENTER_EIP = 0x00006826,
+ HOST_CR0 = 0x00006c00,
+ HOST_CR3 = 0x00006c02,
+ HOST_CR4 = 0x00006c04,
+ HOST_FS_BASE = 0x00006c06,
+ HOST_GS_BASE = 0x00006c08,
+ HOST_TR_BASE = 0x00006c0a,
+ HOST_GDTR_BASE = 0x00006c0c,
+ HOST_IDTR_BASE = 0x00006c0e,
+ HOST_IA32_SYSENTER_ESP = 0x00006c10,
+ HOST_IA32_SYSENTER_EIP = 0x00006c12,
+ HOST_RSP = 0x00006c14,
+ HOST_RIP = 0x00006c16,
+};
+
+#define GUEST_ACTIVITY_ACTIVE 0
+
+#define VMX_MSR_BITMAP_0000_READ 0
+#define VMX_MSR_BITMAP_C000_READ 1
+#define VMX_MSR_BITMAP_0000_WRITE 2
+#define VMX_MSR_BITMAP_C000_WRITE 3
+
+#define PIN_BASED_NMI_EXITING 0x00000008
+#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
+
+#define CPU_BASED_USE_IO_BITMAPS 0x02000000
+#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
+
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
+#define VM_EXIT_SAVE_IA32_EFER 0x00100000
+#define VM_EXIT_LOAD_IA32_EFER 0x00200000
+
+#define VM_ENTRY_IA32E_MODE 0x00000200
+#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
+
+#define INTR_INFO_UNBLOCK_NMI 0x1000
+
+#define EXIT_REASONS_FAILED_VMENTRY 0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
+#define EXIT_REASON_INIT_SIGNAL 3
+#define EXIT_REASON_SIPI 4
+#define EXIT_REASON_IO_SMI 5
+#define EXIT_REASON_OTHER_SMI 6
+#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_NMI_WINDOW 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD 13
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_PREEMPTION_TIMER 52
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_INVPCID 58
+
+#define EPT_FLAG_READ 0x001
+#define EPT_FLAG_WRITE 0x002
+#define EPT_FLAG_EXECUTE 0x004
+#define EPT_FLAG_WB_TYPE 0x030
+
+#define EPT_TYPE_UNCACHEABLE 0
+#define EPT_TYPE_WRITEBACK 6
+#define EPT_PAGE_WALK_LEN ((4-1) << 3)
+
+#define EPT_PAGE_WALK_4 (1UL << 6)
+#define EPTP_WB (1UL << 14)
+#define EPT_INVEPT (1UL << 20)
+#define EPT_INVEPT_SINGLE (1UL << 25)
+#define EPT_INVEPT_GLOBAL (1UL << 26)
+#define EPT_MANDATORY_FEATURES (EPT_PAGE_WALK_4 | EPTP_WB | \
+ EPT_INVEPT)
+
+#define VMX_INVEPT_SINGLE 1
+#define VMX_INVEPT_GLOBAL 2
+
+#define APIC_ACCESS_OFFET_MASK 0x00000fff
+#define APIC_ACCESS_TYPE_MASK 0x0000f000
+#define APIC_ACCESS_TYPE_LINEAR_READ 0x00000000
+#define APIC_ACCESS_TYPE_LINEAR_WRITE 0x00001000
+
+void vmx_init(void);
+
+int vmx_cell_init(struct cell *cell, struct jailhouse_cell_desc *config);
+void vmx_cell_shrink(struct cell *cell, struct jailhouse_cell_desc *config);
+
+int vmx_cpu_init(struct per_cpu *cpu_data);
+void vmx_cpu_exit(struct per_cpu *cpu_data);
+
+void __attribute__((noreturn)) vmx_cpu_activate_vmm(struct per_cpu *cpu_data);
+void vmx_handle_exit(struct registers *guest_regs, struct per_cpu *cpu_data);
+void vmx_entry_failure(struct per_cpu *cpu_data);
+
+void vmx_invept(void);
+
+void vmx_schedule_vmexit(struct per_cpu *cpu_data);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/mmio.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/printk.h>
+#include <asm/spinlock.h>
+#include <asm/fault.h>
+
+struct modrm {
+ u8 rm:3;
+ u8 reg:3;
+ u8 mod:2;
+} __attribute__((packed));
+
+struct sib {
+ u8 reg:3;
+ u8 index:3;
+ u8 ss:2;
+} __attribute__((packed));
+
+static DEFINE_SPINLOCK(mmio_lock);
+
+struct mmio_access mmio_parse(struct per_cpu *cpu_data, unsigned long pc,
+ unsigned long page_table_addr, bool is_write)
+{
+ struct mmio_access access = { .inst_len = 0 };
+ unsigned int cpu_id = cpu_data->cpu_id;
+ struct cell *cell = cpu_data->cell;
+ bool has_regr, has_modrm, does_write;
+ struct modrm modrm;
+ struct sib sib;
+ u8 *page;
+
+ spin_lock(&mmio_lock);
+
+ access.inst_len = 0;
+ has_regr = false;
+
+restart:
+ page = page_map_get_foreign_page(cpu_id, page_table_addr,
+ cell->page_offset, pc,
+ PAGE_DEFAULT_FLAGS);
+ if (!page)
+ goto error_nopage;
+
+ has_modrm = false;
+ switch (page[pc & PAGE_OFFS_MASK]) {
+ case X86_OP_REGR_PREFIX:
+ if (has_regr)
+ goto error_unsupported;
+ has_regr = true;
+ pc++;
+ access.inst_len++;
+ goto restart;
+ case X86_OP_MOV_TO_MEM:
+ access.inst_len += 2;
+ access.size = 4;
+ has_modrm = true;
+ does_write = true;
+ break;
+ case X86_OP_MOV_FROM_MEM:
+ access.inst_len += 2;
+ access.size = 4;
+ has_modrm = true;
+ does_write = false;
+ break;
+ default:
+ goto error_unsupported;
+ }
+
+ if (has_modrm) {
+ pc++;
+ page = page_map_get_foreign_page(cpu_id, page_table_addr,
+ cell->page_offset, pc,
+ PAGE_DEFAULT_FLAGS);
+ if (!page)
+ goto error_nopage;
+
+ modrm = *(struct modrm *)&page[pc & PAGE_OFFS_MASK];
+ switch (modrm.mod) {
+ case 0:
+ if (modrm.rm != 4)
+ goto error_unsupported;
+
+ pc++;
+ page = page_map_get_foreign_page(cpu_id,
+ page_table_addr,
+ cell->page_offset, pc,
+ PAGE_DEFAULT_FLAGS);
+ if (!page)
+ goto error_nopage;
+
+ sib = *(struct sib *)&page[pc & PAGE_OFFS_MASK];
+ if (sib.ss !=0 || sib.index != 4 || sib.reg != 5)
+ goto error_unsupported;
+ access.inst_len += 5;
+ break;
+ case 2:
+ access.inst_len += 4;
+ break;
+ default:
+ goto error_unsupported;
+ }
+ if (has_regr)
+ access.reg = 7 - modrm.reg;
+ else if (modrm.reg == 4)
+ goto error_unsupported;
+ else
+ access.reg = 15 - modrm.reg;
+ }
+
+ if (does_write != is_write)
+ goto error_inconsitent;
+
+unmap_out:
+ page_map_release_foreign_page(cpu_id);
+
+ spin_unlock(&mmio_lock);
+ return access;
+
+error_nopage:
+ panic_printk("FATAL: unable to map MMIO instruction page\n");
+ goto error;
+
+error_unsupported:
+ panic_printk("FATAL: unsupported instruction\n");
+ goto error;
+
+error_inconsitent:
+ panic_printk("FATAL: inconsistent access, expected %s instruction\n",
+ is_write ? "write" : "read");
+error:
+ access.inst_len = 0;
+ goto unmap_out;
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/entry.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/processor.h>
+#include <jailhouse/control.h>
+#include <asm/vmx.h>
+#include <asm/apic.h>
+#include <asm/bitops.h>
+
+#define TSS_BUSY_FLAG (1UL << (9 + 32))
+
+#define NUM_IDT_DESC 20
+
+struct farptr {
+ u64 offs;
+ u16 seg;
+} __attribute__((packed));
+
+static u64 gdt[] = {
+ [GDT_DESC_NULL] = 0,
+ [GDT_DESC_CODE] = 0x00af9b000000ffff,
+ [GDT_DESC_TSS] = 0x0000890000000000,
+ [GDT_DESC_TSS_HI] = 0x0000000000000000,
+};
+
+extern u8 exception_entries[];
+extern u8 nmi_entry[];
+
+static u32 idt[NUM_IDT_DESC * 4];
+
+int arch_init_early(struct cell *linux_cell,
+ struct jailhouse_cell_desc *config)
+{
+ unsigned long entry;
+ unsigned int vector;
+ int err;
+
+ err = apic_init();
+ if (err)
+ return err;
+
+ entry = (unsigned long)exception_entries;
+ for (vector = 0; vector < NUM_IDT_DESC; vector++) {
+ if (vector == NMI_VECTOR || vector == 15)
+ continue;
+ idt[vector * 4] = (entry & 0xffff) |
+ ((GDT_DESC_CODE * 8) << 16);
+ idt[vector * 4 + 1] = 0x8e00 | (entry & 0xffff0000);
+ idt[vector * 4 + 2] = entry >> 32;
+ entry += 16;
+ }
+
+ entry = (unsigned long)nmi_entry;
+ idt[NMI_VECTOR * 4] = (entry & 0xffff) | ((GDT_DESC_CODE * 8) << 16);
+ idt[NMI_VECTOR * 4 + 1] = 0x8e00 | (entry & 0xffff0000);
+ idt[NMI_VECTOR * 4 + 2] = entry >> 32;
+
+ vmx_init();
+
+ err = vmx_cell_init(linux_cell, config);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void set_cs(u16 cs)
+{
+ struct farptr jmp_target;
+ unsigned long tmp;
+
+ jmp_target.seg = cs;
+ asm volatile(
+ "lea 1f(%%rip),%0\n\t"
+ "mov %0,%1\n\t"
+ "rex64/ljmp *%2\n\t"
+ "1:"
+ : "=r" (tmp) : "m" (jmp_target.offs), "m" (jmp_target));
+}
+
+int arch_cpu_init(struct per_cpu *cpu_data)
+{
+ struct desc_table_reg dtr;
+ u64 *linux_tr_desc;
+ int err, n;
+
+ /* read GDTR */
+ read_gdtr(&cpu_data->linux_gdtr);
+
+ /* read TR and TSS descriptor */
+ asm volatile("str %0" : "=m" (cpu_data->linux_tr));
+ linux_tr_desc = (u64 *)(cpu_data->linux_gdtr.base +
+ (cpu_data->linux_tr & 0xfff8));
+ cpu_data->linux_tr_base = ((linux_tr_desc[0] >> 16) & 0xffffff) |
+ ((linux_tr_desc[0] >> 32) & 0xff000000) |
+ (linux_tr_desc[1] << 32);
+ cpu_data->linux_tr_limit = (linux_tr_desc[0] & 0xffff) |
+ ((linux_tr_desc[0] >> 32) & 0xff0000);
+ cpu_data->linux_tr_ar_bytes = (linux_tr_desc[0] >> 40) & 0xffff;
+
+ /* read registers to restore on first VM-entry */
+ for (n = 0; n < NUM_ENTRY_REGS; n++)
+ cpu_data->linux_reg[n] =
+ ((unsigned long *)cpu_data->linux_sp)[n];
+ cpu_data->linux_ip = ((unsigned long *)cpu_data->linux_sp)[6];
+
+ /* swap CR3 */
+ cpu_data->linux_cr3 = read_cr3();
+ write_cr3(page_map_hvirt2phys(hv_page_table));
+
+ /* set GDTR */
+ dtr.limit = NUM_GDT_DESC * 8 - 1;
+ dtr.base = (u64)&gdt;
+ write_gdtr(&dtr);
+
+ /* set CS */
+ asm volatile("mov %%cs,%0": "=m" (cpu_data->linux_cs));
+ set_cs(GDT_DESC_CODE * 8);
+
+ /* paranoid clearing of segment registers */
+ asm volatile(
+ "mov %0,%%es\n\t"
+ "mov %0,%%ds\n\t"
+ "mov %0,%%ss"
+ : : "r" (0));
+
+ /* clear TSS busy flag set by previous loading, then set TR */
+ gdt[GDT_DESC_TSS] &= ~TSS_BUSY_FLAG;
+ asm volatile("ltr %%ax" : : "a" (GDT_DESC_TSS * 8));
+
+ /* swap IDTR */
+ read_idtr(&cpu_data->linux_idtr);
+ dtr.limit = NUM_IDT_DESC * 16 - 1;
+ dtr.base = (u64)&idt;
+ write_idtr(&dtr);
+
+ cpu_data->linux_efer = read_msr(MSR_EFER);
+ cpu_data->linux_fs_base = read_msr(MSR_FS_BASE);
+ cpu_data->linux_gs_base = read_msr(MSR_GS_BASE);
+
+ cpu_data->linux_sysenter_cs = read_msr(MSR_IA32_SYSENTER_CS);
+ cpu_data->linux_sysenter_eip = read_msr(MSR_IA32_SYSENTER_EIP);
+ cpu_data->linux_sysenter_esp = read_msr(MSR_IA32_SYSENTER_ESP);
+
+ cpu_data->initialized = true;
+
+ err = apic_cpu_init(cpu_data);
+ if (err)
+ goto error_out;
+
+ err = vmx_cpu_init(cpu_data);
+ if (err)
+ goto error_out;
+
+ return 0;
+
+error_out:
+ arch_cpu_restore(cpu_data);
+ return err;
+}
+
+int arch_init_late(struct cell *linux_cell,
+ struct jailhouse_cell_desc *config)
+{
+ return 0;
+}
+
+void arch_cpu_activate_vmm(struct per_cpu *cpu_data)
+{
+ vmx_cpu_activate_vmm(cpu_data);
+}
+
+void arch_cpu_restore(struct per_cpu *cpu_data)
+{
+ u64 *gdt;
+
+ if (!cpu_data->initialized)
+ return;
+
+ vmx_cpu_exit(cpu_data);
+
+ write_msr(MSR_EFER, cpu_data->linux_efer);
+ write_cr3(cpu_data->linux_cr3);
+
+ asm volatile("lgdtq %0" : : "m" (cpu_data->linux_gdtr));
+ asm volatile("lidtq %0" : : "m" (cpu_data->linux_idtr));
+
+ set_cs(cpu_data->linux_cs);
+
+ /* clear busy flag in Linux TSS, then reload it */
+ gdt = (u64 *)cpu_data->linux_gdtr.base;
+ gdt[cpu_data->linux_tr / 8] &= ~TSS_BUSY_FLAG;
+ asm volatile("ltr %%ax" : : "a" (cpu_data->linux_tr));
+
+ write_msr(MSR_FS_BASE, cpu_data->linux_fs_base);
+ write_msr(MSR_GS_BASE, cpu_data->linux_gs_base);
+
+ write_msr(MSR_IA32_SYSENTER_CS, cpu_data->linux_sysenter_cs);
+ write_msr(MSR_IA32_SYSENTER_EIP, cpu_data->linux_sysenter_eip);
+ write_msr(MSR_IA32_SYSENTER_ESP, cpu_data->linux_sysenter_esp);
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/entry.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/processor.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/string.h>
+#include <jailhouse/control.h>
+#include <jailhouse/hypercall.h>
+#include <asm/apic.h>
+#include <asm/fault.h>
+#include <asm/vmx.h>
+
+static u8 __attribute__((aligned(PAGE_SIZE))) msr_bitmap[][0x2000/8] = {
+ [ VMX_MSR_BITMAP_0000_READ ] = {
+ [ 0/8 ... 0x7ff/8 ] = 0,
+ [ 0x800/8 ... 0x807/8 ] = 0x0c, /* 0x802, 0x803 */
+ [ 0x808/8 ... 0x80f/8 ] = 0xa5, /* 0x808, 0x80a, 0x80d */
+ [ 0x810/8 ... 0x817/8 ] = 0xff, /* 0x810 - 0x817 */
+ [ 0x818/8 ... 0x81f/8 ] = 0xff, /* 0x818 - 0x81f */
+ [ 0x820/8 ... 0x827/8 ] = 0xff, /* 0x820 - 0x827 */
+ [ 0x828/8 ... 0x82f/8 ] = 0x81, /* 0x828, 0x82f */
+ [ 0x830/8 ... 0x837/8 ] = 0xfd, /* 0x830, 0x832 - 0x837 */
+ [ 0x838/8 ... 0x83f/8 ] = 0x43, /* 0x838, 0x839, 0x83e */
+ [ 0x840/8 ... 0x1fff/8 ] = 0,
+ },
+ [ VMX_MSR_BITMAP_C000_READ ] = {
+ [ 0/8 ... 0x1fff/8 ] = 0,
+ },
+ [ VMX_MSR_BITMAP_0000_WRITE ] = {
+ [ 0/8 ... 0x807/8 ] = 0,
+ [ 0x808/8 ... 0x80f/8 ] = 0x89, /* 0x808, 0x80b, 0x80f */
+ [ 0x810/8 ... 0x827/8 ] = 0,
+ [ 0x828/8 ... 0x82f/8 ] = 0x81, /* 0x828, 0x82f */
+ [ 0x830/8 ... 0x837/8 ] = 0xfd, /* 0x830, 0x832 - 0x837 */
+ [ 0x838/8 ... 0x83f/8 ] = 0xc1, /* 0x838, 0x83e, 0x83f */
+ [ 0x840/8 ... 0x1fff/8 ] = 0,
+ },
+ [ VMX_MSR_BITMAP_C000_WRITE ] = {
+ [ 0/8 ... 0x1fff/8 ] = 0,
+ },
+};
+static u8 __attribute__((aligned(PAGE_SIZE))) apic_access_page[PAGE_SIZE];
+
+static unsigned int vmx_true_msr_offs;
+
+static bool vmxon(struct per_cpu *cpu_data)
+{
+ unsigned long vmxon_addr = page_map_hvirt2phys(cpu_data->vmxon_page);
+ u8 ok;
+
+ asm volatile(
+ "vmxon (%1)\n\t"
+ "seta %0"
+ : "=rm" (ok)
+ : "r" (&vmxon_addr), "m" (vmxon_addr)
+ : "memory", "cc");
+ return ok;
+}
+
+static bool vmcs_clear(struct per_cpu *cpu_data)
+{
+ unsigned long vmcs_addr = page_map_hvirt2phys(cpu_data->vmcs_page);
+ u8 ok;
+
+ asm volatile(
+ "vmclear (%1)\n\t"
+ "seta %0"
+ : "=qm" (ok)
+ : "r" (&vmcs_addr), "m" (vmcs_addr)
+ : "memory", "cc");
+ return ok;
+}
+
+static bool vmcs_load(struct per_cpu *cpu_data)
+{
+ unsigned long vmcs_addr = page_map_hvirt2phys(cpu_data->vmcs_page);
+ u8 ok;
+
+ asm volatile(
+ "vmptrld (%1)\n\t"
+ "seta %0"
+ : "=qm" (ok)
+ : "r" (&vmcs_addr), "m" (vmcs_addr)
+ : "memory", "cc");
+ return ok;
+}
+
+static inline unsigned long vmcs_read64(unsigned long field)
+{
+ unsigned long value;
+
+ asm volatile("vmread %1,%0" : "=r" (value) : "r" (field) : "cc");
+ return value;
+}
+
+static inline u16 vmcs_read16(unsigned long field)
+{
+ return vmcs_read64(field);
+}
+
+static inline u32 vmcs_read32(unsigned long field)
+{
+ return vmcs_read64(field);
+}
+
+static bool vmcs_write64(unsigned long field, unsigned long val)
+{
+ u8 ok;
+
+ asm volatile(
+ "vmwrite %1,%2\n\t"
+ "setnz %0"
+ : "=qm" (ok)
+ : "r" (val), "r" (field)
+ : "cc");
+ if (!ok)
+ printk("FATAL: vmwrite failed, error %d, caller %p\n",
+ vmcs_read32(VM_INSTRUCTION_ERROR),
+ __builtin_return_address(0));
+ return ok;
+}
+
+static bool vmcs_write16(unsigned long field, u16 value)
+{
+ return vmcs_write64(field, value);
+}
+
+static bool vmcs_write32(unsigned long field, u32 value)
+{
+ return vmcs_write64(field, value);
+}
+
+void vmx_init(void)
+{
+ if (!using_x2apic)
+ return;
+
+ /* allow direct x2APIC access except for ICR writes */
+ memset(&msr_bitmap[VMX_MSR_BITMAP_0000_READ][MSR_X2APIC_BASE/8], 0,
+ (MSR_X2APIC_END - MSR_X2APIC_BASE + 1)/8);
+ memset(&msr_bitmap[VMX_MSR_BITMAP_0000_WRITE][MSR_X2APIC_BASE/8], 0,
+ (MSR_X2APIC_END - MSR_X2APIC_BASE + 1)/8);
+ msr_bitmap[VMX_MSR_BITMAP_0000_WRITE][MSR_X2APIC_ICR/8] = 0x01;
+}
+
+int vmx_cell_init(struct cell *cell, struct jailhouse_cell_desc *config)
+{
+ struct jailhouse_memory *mem;
+ u32 page_flags, table_flags;
+ u32 pio_bitmap_size, size;
+ u8 *pio_bitmap;
+ int n, err;
+
+ /* build root cell EPT */
+ cell->vmx.ept = page_alloc(&mem_pool, 1);
+ if (!cell->vmx.ept)
+ return -ENOMEM;
+
+ mem = (void *)config + sizeof(struct jailhouse_cell_desc) +
+ config->cpu_set_size;
+
+ for (n = 0; n < config->num_memory_regions; n++, mem++) {
+ page_flags = EPT_FLAG_WB_TYPE;
+ if (mem->access_flags & JAILHOUSE_MEM_READ)
+ page_flags |= EPT_FLAG_READ;
+ if (mem->access_flags & JAILHOUSE_MEM_WRITE)
+ page_flags |= EPT_FLAG_WRITE;
+ if (mem->access_flags & JAILHOUSE_MEM_EXECUTE)
+ page_flags |= EPT_FLAG_EXECUTE;
+ table_flags = page_flags & ~EPT_FLAG_WB_TYPE;
+
+ err = page_map_create(cell->vmx.ept, mem->phys_start,
+ mem->size, mem->virt_start, page_flags,
+ table_flags, PAGE_DIR_LEVELS);
+ if (err)
+ /* FIXME: release vmx.ept */
+ return err;
+ }
+
+ page_flags = EPT_FLAG_READ | EPT_FLAG_WRITE | EPT_FLAG_WB_TYPE;
+ table_flags = EPT_FLAG_READ | EPT_FLAG_WRITE;
+ err = page_map_create(cell->vmx.ept,
+ page_map_hvirt2phys(apic_access_page),
+ PAGE_SIZE, XAPIC_BASE, page_flags, table_flags,
+ PAGE_DIR_LEVELS);
+ if (err)
+ /* FIXME: release vmx.ept */
+ return err;
+
+ pio_bitmap = (void *)mem +
+ config->num_irq_lines * sizeof(struct jailhouse_irq_line);
+ pio_bitmap_size = config->pio_bitmap_size;
+
+ memset(cell->vmx.io_bitmap, -1, sizeof(cell->vmx.io_bitmap));
+
+ for (n = 0; n < 2; n++) {
+ size = pio_bitmap_size <= PAGE_SIZE ?
+ pio_bitmap_size : PAGE_SIZE;
+ memcpy(cell->vmx.io_bitmap + n * PAGE_SIZE, pio_bitmap, size);
+ pio_bitmap += size;
+ pio_bitmap_size -= size;
+ }
+
+ return 0;
+}
+
+void vmx_cell_shrink(struct cell *cell, struct jailhouse_cell_desc *config)
+{
+ struct jailhouse_memory *mem;
+ u32 pio_bitmap_size;
+ u8 *pio_bitmap, *b;
+ int n;
+
+ mem = (void *)config + sizeof(struct jailhouse_cell_desc) +
+ config->cpu_set_size;
+
+ for (n = 0; n < config->num_memory_regions; n++, mem++)
+ /* FIXME: phys_start only works for the Linux cell. We need
+ * the original memory region, match phys_start and use
+ * virt_start from there. */
+ page_map_destroy(cell->vmx.ept, mem->phys_start, mem->size,
+ PAGE_DIR_LEVELS);
+
+ pio_bitmap = (void *)mem +
+ config->num_irq_lines * sizeof(struct jailhouse_irq_line);
+ pio_bitmap_size = config->pio_bitmap_size;
+
+ for (b = cell->vmx.io_bitmap; pio_bitmap_size > 0;
+ b++, pio_bitmap++, pio_bitmap_size--)
+ *b |= ~*pio_bitmap;
+
+ vmx_invept();
+}
+
+void vmx_invept(void)
+{
+ unsigned long ept_cap = read_msr(MSR_IA32_VMX_EPT_VPID_CAP);
+ struct {
+ u64 eptp;
+ u64 reserved;
+ } descriptor;
+ u64 type;
+ u8 ok;
+
+ descriptor.reserved = 0;
+ if (ept_cap & EPT_INVEPT_SINGLE) {
+ type = VMX_INVEPT_SINGLE;
+ descriptor.eptp = vmcs_read64(EPT_POINTER);
+ } else {
+ type = VMX_INVEPT_GLOBAL;
+ descriptor.eptp = 0;
+ }
+ asm volatile(
+ "invept (%1),%2\n\t"
+ "seta %0\n\t"
+ : "=qm" (ok)
+ : "r" (&descriptor), "r" (type)
+ : "memory", "cc");
+
+ if (!ok) {
+ panic_printk("FATAL: invept failed, error %d\n",
+ vmcs_read32(VM_INSTRUCTION_ERROR));
+ panic_stop(NULL);
+ }
+}
+
+static bool vmx_set_guest_cr(int cr, unsigned long val)
+{
+ unsigned long fixed0, fixed1, required1;
+ bool ok = true;
+
+ fixed0 = read_msr(cr ? MSR_IA32_VMX_CR4_FIXED0
+ : MSR_IA32_VMX_CR0_FIXED0);
+ fixed1 = read_msr(cr ? MSR_IA32_VMX_CR4_FIXED1
+ : MSR_IA32_VMX_CR0_FIXED1);
+ required1 = fixed0 & fixed1;
+ if (cr == 0) {
+ fixed1 &= ~(X86_CR0_NW | X86_CR0_CD);
+ required1 &= ~(X86_CR0_PE | X86_CR0_PG);
+ required1 |= X86_CR0_ET;
+ } else {
+ /* keeps the hypervisor visible */
+ val |= X86_CR4_VMXE;
+ }
+ ok &= vmcs_write64(cr ? GUEST_CR4 : GUEST_CR0,
+ (val & fixed1) | required1);
+ ok &= vmcs_write64(cr ? CR4_READ_SHADOW : CR0_READ_SHADOW, val);
+ ok &= vmcs_write64(cr ? CR4_GUEST_HOST_MASK : CR0_GUEST_HOST_MASK,
+ required1 | ~fixed1);
+
+ return ok;
+}
+
+static bool vmx_set_cell_config(struct cell *cell)
+{
+ u8 *io_bitmap;
+ bool ok = true;
+
+ io_bitmap = cell->vmx.io_bitmap;
+ ok &= vmcs_write64(IO_BITMAP_A, page_map_hvirt2phys(io_bitmap));
+ ok &= vmcs_write64(IO_BITMAP_B,
+ page_map_hvirt2phys(io_bitmap + PAGE_SIZE));
+
+ ok &= vmcs_write64(EPT_POINTER,
+ page_map_hvirt2phys(cell->vmx.ept) |
+ EPT_TYPE_WRITEBACK | EPT_PAGE_WALK_LEN);
+
+ return ok;
+}
+
+static bool vmcs_setup(struct per_cpu *cpu_data)
+{
+ struct desc_table_reg dtr;
+ unsigned long val;
+ bool ok = true;
+
+ ok &= vmcs_write64(HOST_CR0, read_cr0());
+ ok &= vmcs_write64(HOST_CR3, read_cr3());
+ ok &= vmcs_write64(HOST_CR4, read_cr4());
+
+ ok &= vmcs_write16(HOST_CS_SELECTOR, GDT_DESC_CODE * 8);
+ ok &= vmcs_write16(HOST_DS_SELECTOR, 0);
+ ok &= vmcs_write16(HOST_ES_SELECTOR, 0);
+ ok &= vmcs_write16(HOST_SS_SELECTOR, 0);
+ ok &= vmcs_write16(HOST_FS_SELECTOR, 0);
+ ok &= vmcs_write16(HOST_GS_SELECTOR, 0);
+ ok &= vmcs_write16(HOST_TR_SELECTOR, GDT_DESC_TSS * 8);
+
+ ok &= vmcs_write64(HOST_FS_BASE, 0);
+ ok &= vmcs_write64(HOST_GS_BASE, 0);
+ ok &= vmcs_write64(HOST_TR_BASE, 0);
+
+ read_gdtr(&dtr);
+ ok &= vmcs_write64(HOST_GDTR_BASE, dtr.base);
+ read_idtr(&dtr);
+ ok &= vmcs_write64(HOST_IDTR_BASE, dtr.base);
+
+ ok &= vmcs_write64(HOST_IA32_EFER, EFER_LMA | EFER_LME);
+
+ ok &= vmcs_write32(HOST_IA32_SYSENTER_CS, 0);
+ ok &= vmcs_write64(HOST_IA32_SYSENTER_EIP, 0);
+ ok &= vmcs_write64(HOST_IA32_SYSENTER_ESP, 0);
+
+ ok &= vmcs_write64(HOST_RSP, (unsigned long)cpu_data->stack +
+ sizeof(cpu_data->stack));
+ ok &= vmcs_write64(HOST_RIP, (unsigned long)vm_exit);
+
+ ok &= vmx_set_guest_cr(0, read_cr0());
+ ok &= vmx_set_guest_cr(4, read_cr4());
+
+ ok &= vmcs_write64(GUEST_CR3, cpu_data->linux_cr3);
+
+ ok &= vmcs_write16(GUEST_CS_SELECTOR, cpu_data->linux_cs);
+ ok &= vmcs_write64(GUEST_CS_BASE, 0);
+ ok &= vmcs_write32(GUEST_CS_LIMIT, 0xffffffff);
+ ok &= vmcs_write32(GUEST_CS_AR_BYTES, 0x0a09b);
+
+ ok &= vmcs_write16(GUEST_DS_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_DS_BASE, 0);
+ ok &= vmcs_write32(GUEST_DS_LIMIT, 0);
+ ok &= vmcs_write32(GUEST_DS_AR_BYTES, 0x10000);
+
+ ok &= vmcs_write16(GUEST_ES_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_ES_BASE, 0);
+ ok &= vmcs_write32(GUEST_ES_LIMIT, 0);
+ ok &= vmcs_write32(GUEST_ES_AR_BYTES, 0x10000);
+
+ ok &= vmcs_write16(GUEST_FS_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_FS_BASE, cpu_data->linux_fs_base);
+ ok &= vmcs_write32(GUEST_FS_LIMIT, 0);
+ ok &= vmcs_write32(GUEST_FS_AR_BYTES, 0x10000);
+
+ ok &= vmcs_write16(GUEST_GS_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_GS_BASE, cpu_data->linux_gs_base);
+ ok &= vmcs_write32(GUEST_GS_LIMIT, 0);
+ ok &= vmcs_write32(GUEST_GS_AR_BYTES, 0x10000);
+
+ ok &= vmcs_write16(GUEST_SS_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_SS_BASE, 0);
+ ok &= vmcs_write32(GUEST_SS_LIMIT, 0);
+ ok &= vmcs_write32(GUEST_SS_AR_BYTES, 0x10000);
+
+ ok &= vmcs_write16(GUEST_TR_SELECTOR, cpu_data->linux_tr);
+ ok &= vmcs_write64(GUEST_TR_BASE, cpu_data->linux_tr_base);
+ ok &= vmcs_write32(GUEST_TR_LIMIT, cpu_data->linux_tr_limit);
+ ok &= vmcs_write32(GUEST_TR_AR_BYTES, cpu_data->linux_tr_ar_bytes);
+
+ ok &= vmcs_write16(GUEST_LDTR_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_LDTR_BASE, 0);
+ ok &= vmcs_write32(GUEST_LDTR_LIMIT, 0);
+ ok &= vmcs_write32(GUEST_LDTR_AR_BYTES, 0x10000);
+
+ ok &= vmcs_write64(GUEST_GDTR_BASE, cpu_data->linux_gdtr.base);
+ ok &= vmcs_write32(GUEST_GDTR_LIMIT, cpu_data->linux_gdtr.limit);
+ ok &= vmcs_write64(GUEST_IDTR_BASE, cpu_data->linux_idtr.base);
+ ok &= vmcs_write32(GUEST_IDTR_LIMIT, cpu_data->linux_idtr.limit);
+
+ ok &= vmcs_write64(GUEST_RFLAGS, 0x02);
+ ok &= vmcs_write64(GUEST_RSP, cpu_data->linux_sp +
+ (NUM_ENTRY_REGS + 1) * sizeof(unsigned long));
+ ok &= vmcs_write64(GUEST_RIP, cpu_data->linux_ip);
+
+ ok &= vmcs_write32(GUEST_SYSENTER_CS,
+ read_msr(MSR_IA32_SYSENTER_CS));
+ ok &= vmcs_write64(GUEST_SYSENTER_EIP,
+ read_msr(MSR_IA32_SYSENTER_EIP));
+ ok &= vmcs_write64(GUEST_SYSENTER_ESP,
+ read_msr(MSR_IA32_SYSENTER_ESP));
+
+ ok &= vmcs_write64(GUEST_DR7, 0x00000400);
+
+ ok &= vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+ ok &= vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
+ ok &= vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+
+ ok &= vmcs_write64(GUEST_IA32_EFER, cpu_data->linux_efer);
+
+ // TODO: switch PAT, PERF */
+
+ ok &= vmcs_write64(VMCS_LINK_POINTER, -1UL);
+ ok &= vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
+
+ val = read_msr(MSR_IA32_VMX_PINBASED_CTLS + vmx_true_msr_offs);
+ val |= PIN_BASED_NMI_EXITING;
+ ok &= vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, val);
+
+ ok &= vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+ val = read_msr(MSR_IA32_VMX_PROCBASED_CTLS + vmx_true_msr_offs);
+ val |= CPU_BASED_USE_IO_BITMAPS | CPU_BASED_USE_MSR_BITMAPS |
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ ok &= vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, val);
+
+ ok &= vmcs_write64(MSR_BITMAP, page_map_hvirt2phys(msr_bitmap));
+
+ val = read_msr(MSR_IA32_VMX_PROCBASED_CTLS2);
+ val |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_ENABLE_EPT | SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ ok &= vmcs_write32(SECONDARY_VM_EXEC_CONTROL, val);
+
+ ok &= vmcs_write64(APIC_ACCESS_ADDR,
+ page_map_hvirt2phys(apic_access_page));
+
+ ok &= vmx_set_cell_config(cpu_data->cell);
+
+ ok &= vmcs_write32(EXCEPTION_BITMAP, 0);
+
+ val = read_msr(MSR_IA32_VMX_EXIT_CTLS + vmx_true_msr_offs);
+ val |= VM_EXIT_HOST_ADDR_SPACE_SIZE | VM_EXIT_SAVE_IA32_EFER |
+ VM_EXIT_LOAD_IA32_EFER;
+ ok &= vmcs_write32(VM_EXIT_CONTROLS, val);
+
+ ok &= vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ ok &= vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
+ ok &= vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
+
+ val = read_msr(MSR_IA32_VMX_ENTRY_CTLS + vmx_true_msr_offs);
+ val |= VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER;
+ ok &= vmcs_write32(VM_ENTRY_CONTROLS, val);
+
+ ok &= vmcs_write64(CR4_GUEST_HOST_MASK, 0);
+
+ ok &= vmcs_write32(CR3_TARGET_COUNT, 0);
+
+ return ok;
+}
+
+int vmx_cpu_init(struct per_cpu *cpu_data)
+{
+ unsigned long vmx_proc_ctrl, vmx_proc_ctrl2, ept_cap;
+ unsigned long vmx_pin_ctrl, feature_ctrl, mask;
+ unsigned long vmx_basic;
+ unsigned long cr4;
+ u32 revision_id;
+
+ if (!(cpuid_ecx(1) & X86_FEATURE_VMX))
+ return -ENODEV;
+
+ cr4 = read_cr4();
+ if (cr4 & X86_CR4_VMXE)
+ return -EBUSY;
+
+ vmx_basic = read_msr(MSR_IA32_VMX_BASIC);
+
+ /* require VMCS size <= PAGE_SIZE */
+ if (((vmx_basic >> 32) & 0x1fff) > PAGE_SIZE)
+ return -EIO;
+
+ /* require VMCS memory access type == write back */
+ if (((vmx_basic >> 50) & 0xf) != 6)
+ return -EIO;
+
+ if (vmx_basic & (1UL << 55))
+ vmx_true_msr_offs = MSR_IA32_VMX_TRUE_PINBASED_CTLS -
+ MSR_IA32_VMX_PINBASED_CTLS;
+
+ /* require NMI exiting and preemption timer support */
+ vmx_pin_ctrl = read_msr(MSR_IA32_VMX_PINBASED_CTLS +
+ vmx_true_msr_offs) >> 32;
+ if (!(vmx_pin_ctrl & PIN_BASED_NMI_EXITING) ||
+ !(vmx_pin_ctrl & PIN_BASED_VMX_PREEMPTION_TIMER))
+ return -EIO;
+
+ /* require I/O and MSR bitmap as well as secondary controls support */
+ vmx_proc_ctrl = read_msr(MSR_IA32_VMX_PROCBASED_CTLS +
+ vmx_true_msr_offs) >> 32;
+ if (!(vmx_proc_ctrl & CPU_BASED_USE_IO_BITMAPS) ||
+ !(vmx_proc_ctrl & CPU_BASED_USE_MSR_BITMAPS) ||
+ !(vmx_proc_ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+ return -EIO;
+
+ /* require APIC access, EPT and unrestricted guest mode support */
+ vmx_proc_ctrl2 = read_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
+ ept_cap = read_msr(MSR_IA32_VMX_EPT_VPID_CAP);
+ if (!(vmx_proc_ctrl2 & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) ||
+ !(vmx_proc_ctrl2 & SECONDARY_EXEC_ENABLE_EPT) ||
+ (ept_cap & EPT_MANDATORY_FEATURES) != EPT_MANDATORY_FEATURES ||
+ !(ept_cap & (EPT_INVEPT_SINGLE | EPT_INVEPT_GLOBAL)) ||
+ !(vmx_proc_ctrl2 & SECONDARY_EXEC_UNRESTRICTED_GUEST))
+ return -EIO;
+
+ revision_id = (u32)vmx_basic;
+ *(u32 *)cpu_data->vmxon_page = revision_id;
+ *(u32 *)cpu_data->vmcs_page = revision_id;
+
+ // TODO: validate CR0
+
+ /* Note: We assume that TXT is off */
+ feature_ctrl = read_msr(MSR_IA32_FEATURE_CONTROL);
+ mask = FEATURE_CONTROL_LOCKED |
+ FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+
+ if ((feature_ctrl & mask) != mask) {
+ if (feature_ctrl & FEATURE_CONTROL_LOCKED)
+ return -ENODEV;
+
+ feature_ctrl |= mask;
+ write_msr(MSR_IA32_FEATURE_CONTROL, feature_ctrl);
+ }
+
+ write_cr4(cr4 | X86_CR4_VMXE);
+ // TODO: validate CR4
+
+ if (!vmxon(cpu_data)) {
+ write_cr4(cr4);
+ return -EIO;
+ }
+
+ cpu_data->vmx_state = VMXON;
+
+ if (!vmcs_clear(cpu_data) ||
+ !vmcs_load(cpu_data) ||
+ !vmcs_setup(cpu_data))
+ return -EIO;
+
+ cpu_data->vmx_state = VMCS_READY;
+
+ return 0;
+}
+
+void vmx_cpu_exit(struct per_cpu *cpu_data)
+{
+ if (cpu_data->vmx_state == VMXOFF)
+ return;
+
+ cpu_data->vmx_state = VMXOFF;
+ vmcs_clear(cpu_data);
+ asm volatile("vmxoff" : : : "cc");
+ write_cr4(read_cr4() & ~X86_CR4_VMXE);
+}
+
+void vmx_cpu_activate_vmm(struct per_cpu *cpu_data)
+{
+ /* We enter Linux at the point arch_entry would return to as well.
+ * rax is cleared to signal success to the caller. */
+ asm volatile(
+ "mov (%%rdi),%%r15\n\t"
+ "mov 0x8(%%rdi),%%r14\n\t"
+ "mov 0x10(%%rdi),%%r13\n\t"
+ "mov 0x18(%%rdi),%%r12\n\t"
+ "mov 0x20(%%rdi),%%rbx\n\t"
+ "mov 0x28(%%rdi),%%rbp\n\t"
+ "vmlaunch\n\t"
+ "pop %%rbp"
+ : /* no output */
+ : "a" (0), "D" (cpu_data->linux_reg)
+ : "memory", "r15", "r14", "r13", "r12", "rbx", "rbp", "cc");
+
+ panic_printk("FATAL: vmlaunch failed, error %d\n",
+ vmcs_read32(VM_INSTRUCTION_ERROR));
+ panic_stop(cpu_data);
+}
+
+static void __attribute__((noreturn))
+vmx_cpu_deactivate_vmm(struct registers *guest_regs, struct per_cpu *cpu_data)
+{
+ unsigned long *stack = (unsigned long *)vmcs_read64(GUEST_RSP);
+ unsigned long linux_ip = vmcs_read64(GUEST_RIP);
+
+ cpu_data->linux_cr3 = vmcs_read64(GUEST_CR3);
+
+ cpu_data->linux_gdtr.base = vmcs_read64(GUEST_GDTR_BASE);
+ cpu_data->linux_gdtr.limit = vmcs_read64(GUEST_GDTR_LIMIT);
+ cpu_data->linux_idtr.base = vmcs_read64(GUEST_IDTR_BASE);
+ cpu_data->linux_idtr.limit = vmcs_read64(GUEST_IDTR_LIMIT);
+
+ cpu_data->linux_cs = vmcs_read32(GUEST_CS_SELECTOR);
+
+ cpu_data->linux_tr = vmcs_read32(GUEST_TR_SELECTOR);
+
+ cpu_data->linux_efer = vmcs_read64(GUEST_IA32_EFER);
+ cpu_data->linux_fs_base = vmcs_read64(GUEST_FS_BASE);
+ cpu_data->linux_gs_base = vmcs_read64(GUEST_GS_BASE);
+
+ cpu_data->linux_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
+ cpu_data->linux_sysenter_eip = vmcs_read64(GUEST_SYSENTER_EIP);
+ cpu_data->linux_sysenter_esp = vmcs_read64(GUEST_SYSENTER_ESP);
+
+ arch_cpu_restore(cpu_data);
+
+ stack--;
+ *stack = linux_ip;
+
+ asm volatile (
+ "mov %%rbx,%%rsp\n\t"
+ "pop %%r15\n\t"
+ "pop %%r14\n\t"
+ "pop %%r13\n\t"
+ "pop %%r12\n\t"
+ "pop %%r11\n\t"
+ "pop %%r10\n\t"
+ "pop %%r9\n\t"
+ "pop %%r8\n\t"
+ "pop %%rdi\n\t"
+ "pop %%rsi\n\t"
+ "pop %%rbp\n\t"
+ "add $8,%%rsp\n\t"
+ "pop %%rbx\n\t"
+ "pop %%rdx\n\t"
+ "pop %%rcx\n\t"
+ "mov %%rax,%%rsp\n\t"
+ "xor %%rax,%%rax\n\t"
+ "ret"
+ : : "a" (stack), "b" (guest_regs));
+ __builtin_unreachable();
+}
+
+static void vmx_cpu_reset(struct registers *guest_regs,
+ struct per_cpu *cpu_data, unsigned int sipi_vector)
+{
+ unsigned long val;
+ bool ok = true;
+
+ ok &= vmx_set_guest_cr(0, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
+ ok &= vmx_set_guest_cr(4, 0);
+
+ ok &= vmcs_write64(GUEST_CR3, 0);
+
+ ok &= vmcs_write64(GUEST_RFLAGS, 0x02);
+ ok &= vmcs_write64(GUEST_RSP, 0);
+
+ val = 0;
+ if (sipi_vector == APIC_BSP_PSEUDO_SIPI) {
+ val = 0xfff0;
+ sipi_vector = 0xf0;
+ }
+ ok &= vmcs_write64(GUEST_RIP, val);
+
+ ok &= vmcs_write16(GUEST_CS_SELECTOR, sipi_vector << 8);
+ ok &= vmcs_write64(GUEST_CS_BASE, sipi_vector << 12);
+ ok &= vmcs_write32(GUEST_CS_LIMIT, 0xffff);
+ ok &= vmcs_write32(GUEST_CS_AR_BYTES, 0x0009b);
+
+ ok &= vmcs_write16(GUEST_DS_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_DS_BASE, 0);
+ ok &= vmcs_write32(GUEST_DS_LIMIT, 0xffff);
+ ok &= vmcs_write32(GUEST_DS_AR_BYTES, 0x00093);
+
+ ok &= vmcs_write16(GUEST_ES_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_ES_BASE, 0);
+ ok &= vmcs_write32(GUEST_ES_LIMIT, 0xffff);
+ ok &= vmcs_write32(GUEST_ES_AR_BYTES, 0x00093);
+
+ ok &= vmcs_write16(GUEST_FS_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_FS_BASE, 0);
+ ok &= vmcs_write32(GUEST_FS_LIMIT, 0xffff);
+ ok &= vmcs_write32(GUEST_FS_AR_BYTES, 0x00093);
+
+ ok &= vmcs_write16(GUEST_GS_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_GS_BASE, 0);
+ ok &= vmcs_write32(GUEST_GS_LIMIT, 0xffff);
+ ok &= vmcs_write32(GUEST_GS_AR_BYTES, 0x00093);
+
+ ok &= vmcs_write16(GUEST_SS_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_SS_BASE, 0);
+ ok &= vmcs_write32(GUEST_SS_LIMIT, 0xffff);
+ ok &= vmcs_write32(GUEST_SS_AR_BYTES, 0x00093);
+
+ ok &= vmcs_write16(GUEST_TR_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_TR_BASE, 0);
+ ok &= vmcs_write32(GUEST_TR_LIMIT, 0xffff);
+ ok &= vmcs_write32(GUEST_TR_AR_BYTES, 0x0008b);
+
+ ok &= vmcs_write16(GUEST_LDTR_SELECTOR, 0);
+ ok &= vmcs_write64(GUEST_LDTR_BASE, 0);
+ ok &= vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
+ ok &= vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
+
+ ok &= vmcs_write64(GUEST_GDTR_BASE, 0);
+ ok &= vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
+ ok &= vmcs_write64(GUEST_IDTR_BASE, 0);
+ ok &= vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
+
+ ok &= vmcs_write64(GUEST_IA32_EFER, 0);
+
+ ok &= vmcs_write32(GUEST_SYSENTER_CS, 0);
+ ok &= vmcs_write64(GUEST_SYSENTER_EIP, 0);
+ ok &= vmcs_write64(GUEST_SYSENTER_ESP, 0);
+
+ ok &= vmcs_write64(GUEST_DR7, 0x00000400);
+
+ ok &= vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+ ok &= vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
+ ok &= vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+
+ val = vmcs_read32(VM_ENTRY_CONTROLS);
+ val &= ~VM_ENTRY_IA32E_MODE;
+ ok &= vmcs_write32(VM_ENTRY_CONTROLS, val);
+
+ ok &= vmx_set_cell_config(cpu_data->cell);
+
+ memset(guest_regs, 0, sizeof(*guest_regs));
+
+ if (!ok) {
+ panic_printk("FATAL: CPU reset failed\n");
+ panic_stop(cpu_data);
+ }
+}
+
+void vmx_schedule_vmexit(struct per_cpu *cpu_data)
+{
+ u32 pin_based_ctrl;
+
+ if (!cpu_data->vmx_state == VMCS_READY)
+ return;
+
+ pin_based_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+ pin_based_ctrl |= PIN_BASED_VMX_PREEMPTION_TIMER;
+ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, pin_based_ctrl);
+}
+
+static void vmx_disable_preemption_timer(void)
+{
+ u32 pin_based_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+
+ pin_based_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, pin_based_ctrl);
+}
+
+static void skip_emulated_instruction(unsigned int inst_len)
+{
+ vmcs_write64(GUEST_RIP, vmcs_read64(GUEST_RIP) + inst_len);
+}
+
+static void update_efer(void)
+{
+ unsigned long efer = vmcs_read64(GUEST_IA32_EFER);
+
+ if ((efer & (EFER_LME | EFER_LMA)) != EFER_LME)
+ return;
+
+ efer |= EFER_LMA;
+ vmcs_write64(GUEST_IA32_EFER, efer);
+ vmcs_write32(VM_ENTRY_CONTROLS,
+ vmcs_read32(VM_ENTRY_CONTROLS) | VM_ENTRY_IA32E_MODE);
+}
+
+static bool vmx_handle_cr(struct registers *guest_regs,
+ struct per_cpu *cpu_data)
+{
+ u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+ unsigned long cr, reg, val;
+
+ cr = exit_qualification & 0xf;
+ reg = (exit_qualification >> 8) & 0xf;
+
+ switch ((exit_qualification >> 4) & 3) {
+ case 0: /* move to cr */
+ if (reg == 4)
+ val = vmcs_read64(GUEST_RSP);
+ else
+ val = ((unsigned long *)guest_regs)[15 - reg];
+
+ if (cr == 0 || cr == 4) {
+ skip_emulated_instruction(X86_INST_LEN_MOV_TO_CR);
+ /* TODO: check for #GP reasons */
+ vmx_set_guest_cr(cr, val);
+ if (cr == 0 && val & X86_CR0_PG)
+ update_efer();
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+ panic_printk("FATAL: Unhandled CR access, qualification %x\n",
+ exit_qualification);
+ return false;
+}
+
+static bool vmx_handle_apic_access(struct registers *guest_regs,
+ struct per_cpu *cpu_data)
+{
+ unsigned int inst_len, offset;
+ unsigned long page_table_addr;
+ u64 qualification;
+ bool is_write;
+
+ qualification = vmcs_read64(EXIT_QUALIFICATION);
+
+ switch (qualification & APIC_ACCESS_TYPE_MASK) {
+ case APIC_ACCESS_TYPE_LINEAR_READ:
+ case APIC_ACCESS_TYPE_LINEAR_WRITE:
+ is_write = !!(qualification & APIC_ACCESS_TYPE_LINEAR_WRITE);
+ offset = qualification & APIC_ACCESS_OFFET_MASK;
+ if (offset & 0x00f)
+ break;
+
+ page_table_addr = vmcs_read64(GUEST_CR3) & PAGE_ADDR_MASK;
+
+ inst_len = apic_mmio_access(guest_regs, cpu_data,
+ vmcs_read64(GUEST_RIP),
+ page_table_addr, offset >> 4,
+ is_write);
+ if (!inst_len)
+ return false;
+
+ skip_emulated_instruction(inst_len);
+ return true;
+ }
+ panic_printk("FATAL: Unhandled APIC access, "
+ "qualification %x\n", qualification);
+ return false;
+}
+
+static void dump_vm_exit_details(u32 reason)
+{
+ panic_printk("qualification %x\n", vmcs_read64(EXIT_QUALIFICATION));
+ panic_printk("vectoring info: %x interrupt info: %x\n",
+ vmcs_read32(IDT_VECTORING_INFO_FIELD),
+ vmcs_read32(VM_EXIT_INTR_INFO));
+ if (reason == EXIT_REASON_EPT_VIOLATION ||
+ reason == EXIT_REASON_EPT_MISCONFIG)
+ panic_printk("guest phys addr %p guest linear addr: %p\n",
+ vmcs_read64(GUEST_PHYSICAL_ADDRESS),
+ vmcs_read64(GUEST_LINEAR_ADDRESS));
+}
+
+static void dump_guest_regs(struct registers *guest_regs)
+{
+ panic_printk("RIP: %p RSP: %p FLAGS: %x\n", vmcs_read64(GUEST_RIP),
+ vmcs_read64(GUEST_RSP), vmcs_read64(GUEST_RFLAGS));
+ panic_printk("RAX: %p RBX: %p RCX: %p\n", guest_regs->rax,
+ guest_regs->rbx, guest_regs->rcx);
+ panic_printk("RDX: %p RSI: %p RDI: %p\n", guest_regs->rdx,
+ guest_regs->rsi, guest_regs->rdi);
+ panic_printk("CS: %x BASE: %p AR-BYTES: %x EFER.LMA %d\n",
+ vmcs_read64(GUEST_CS_SELECTOR),
+ vmcs_read64(GUEST_CS_BASE),
+ vmcs_read32(GUEST_CS_AR_BYTES),
+ !!(vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE));
+ panic_printk("CR0: %p CR3: %p CR4: %p\n", vmcs_read64(GUEST_CR0),
+ vmcs_read64(GUEST_CR3), vmcs_read64(GUEST_CR4));
+ panic_printk("EFER: %p\n", vmcs_read64(GUEST_IA32_EFER));
+}
+
+void vmx_handle_exit(struct registers *guest_regs, struct per_cpu *cpu_data)
+{
+ u32 reason = vmcs_read32(VM_EXIT_REASON);
+ int sipi_vector;
+
+ if (reason & EXIT_REASONS_FAILED_VMENTRY) {
+ panic_printk("FATAL: VM-Entry failure, reason %d\n",
+ (u16)reason);
+ goto dump_and_stop;
+ }
+
+ switch (reason) {
+ case EXIT_REASON_EXCEPTION_NMI:
+ asm volatile("int %0" : : "i" (NMI_VECTOR));
+ /* fall through */
+ case EXIT_REASON_PREEMPTION_TIMER:
+ vmx_disable_preemption_timer();
+ sipi_vector = apic_handle_events(cpu_data);
+ if (sipi_vector >= 0) {
+ printk("CPU %d received SIPI, vector %x\n",
+ cpu_data->cpu_id, sipi_vector);
+ vmx_cpu_reset(guest_regs, cpu_data, sipi_vector);
+ }
+ return;
+ case EXIT_REASON_CPUID:
+ skip_emulated_instruction(X86_INST_LEN_CPUID);
+ guest_regs->rax &= 0xffffffff;
+ guest_regs->rbx &= 0xffffffff;
+ guest_regs->rcx &= 0xffffffff;
+ guest_regs->rdx &= 0xffffffff;
+ __cpuid((u32 *)&guest_regs->rax, (u32 *)&guest_regs->rbx,
+ (u32 *)&guest_regs->rcx, (u32 *)&guest_regs->rdx);
+ return;
+ case EXIT_REASON_VMCALL:
+ skip_emulated_instruction(X86_INST_LEN_VMCALL);
+ switch (guest_regs->rax) {
+ case JAILHOUSE_HC_DISABLE:
+ guest_regs->rax = shutdown(cpu_data);
+ if (guest_regs->rax == 0)
+ vmx_cpu_deactivate_vmm(guest_regs, cpu_data);
+ break;
+ case JAILHOUSE_HC_CELL_CREATE:
+ guest_regs->rax = cell_create(cpu_data,
+ guest_regs->rdi);
+ break;
+ default:
+ printk("CPU %d: Unknown vmcall %d, RIP: %p\n",
+ cpu_data->cpu_id, guest_regs->rax,
+ vmcs_read64(GUEST_RIP) - X86_INST_LEN_VMCALL);
+ guest_regs->rax = -ENOSYS;
+ break;
+ }
+ return;
+ case EXIT_REASON_CR_ACCESS:
+ if (vmx_handle_cr(guest_regs, cpu_data))
+ return;
+ break;
+ case EXIT_REASON_MSR_READ:
+ skip_emulated_instruction(X86_INST_LEN_RDMSR);
+ if (guest_regs->rcx >= MSR_X2APIC_BASE &&
+ guest_regs->rcx <= MSR_X2APIC_END) {
+ x2apic_handle_read(guest_regs);
+ return;
+ }
+ panic_printk("FATAL: Unhandled MSR read: %08x\n",
+ guest_regs->rcx);
+ break;
+ case EXIT_REASON_MSR_WRITE:
+ skip_emulated_instruction(X86_INST_LEN_WRMSR);
+ if (guest_regs->rcx == MSR_X2APIC_ICR) {
+ apic_handle_icr_write(cpu_data, guest_regs->rax,
+ guest_regs->rdx);
+ return;
+ }
+ if (guest_regs->rcx >= MSR_X2APIC_BASE &&
+ guest_regs->rcx <= MSR_X2APIC_END) {
+ x2apic_handle_write(guest_regs);
+ return;
+ }
+ panic_printk("FATAL: Unhandled MSR write: %08x\n",
+ guest_regs->rcx);
+ break;
+ case EXIT_REASON_APIC_ACCESS:
+ if (vmx_handle_apic_access(guest_regs, cpu_data))
+ return;
+ break;
+ default:
+ panic_printk("FATAL: Unhandled VM-Exit, reason %d, ",
+ (u16)reason);
+ dump_vm_exit_details(reason);
+ break;
+ }
+dump_and_stop:
+ dump_guest_regs(guest_regs);
+ panic_stop(cpu_data);
+}
+
+void vmx_entry_failure(struct per_cpu *cpu_data)
+{
+ panic_printk("FATAL: vmresume failed, error %d\n",
+ vmcs_read32(VM_INSTRUCTION_ERROR));
+ panic_stop(cpu_data);
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/entry.h>
+#include <jailhouse/control.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/string.h>
+#include <asm/bitops.h>
+#include <asm/spinlock.h>
+
+struct jailhouse_system *system_config;
+struct cell *cell_list;
+
+static DEFINE_SPINLOCK(shutdown_lock);
+
+unsigned int next_cpu(unsigned int cpu, struct cpu_set *cpu_set, int exception)
+{
+ do
+ cpu++;
+ while (cpu <= cpu_set->max_cpu_id &&
+ (cpu == exception || !test_bit(cpu, cpu_set->bitmap)));
+ return cpu;
+}
+
+static void cell_suspend(struct per_cpu *cpu_data)
+{
+ struct cell *cell = cpu_data->cell;
+ unsigned int cpu;
+
+ for_each_cpu_except(cpu, cell->cpu_set, cpu_data->cpu_id)
+ arch_suspend_cpu(cpu);
+ printk("Suspended cell \"%s\"\n", cell->name);
+}
+
+static void cell_resume(struct per_cpu *cpu_data)
+{
+ unsigned int cpu;
+
+ for_each_cpu_except(cpu, cpu_data->cell->cpu_set, cpu_data->cpu_id)
+ arch_resume_cpu(cpu);
+}
+
+static unsigned int get_free_cell_id(void)
+{
+ unsigned int id = 0;
+ struct cell *cell;
+
+retry:
+ for (cell = cell_list; cell; cell = cell->next)
+ if (cell->id == id) {
+ id++;
+ goto retry;
+ }
+
+ return id;
+}
+
+int cell_init(struct cell *cell, struct jailhouse_cell_desc *config,
+ bool copy_cpu_set)
+{
+ unsigned long *config_cpu_set =
+ (unsigned long *)(((void *)config) +
+ sizeof(struct jailhouse_cell_desc));
+ unsigned long cpu_set_size = config->cpu_set_size;
+ struct jailhouse_memory *config_ram =
+ (struct jailhouse_memory *)(((void *)config_cpu_set) +
+ cpu_set_size);
+ struct cpu_set *cpu_set;
+
+ memcpy(cell->name, config->name, sizeof(cell->name));
+ cell->id = get_free_cell_id();
+
+ if (cpu_set_size > PAGE_SIZE)
+ return -EINVAL;
+ else if (cpu_set_size > sizeof(cell->small_cpu_set.bitmap)) {
+ cpu_set = page_alloc(&mem_pool, 1);
+ if (!cpu_set)
+ return -ENOMEM;
+ cpu_set->max_cpu_id =
+ ((PAGE_SIZE - sizeof(unsigned long)) * 8) - 1;
+ } else {
+ cpu_set = &cell->small_cpu_set;
+ cpu_set->max_cpu_id =
+ (sizeof(cell->small_cpu_set.bitmap) * 8) - 1;
+ }
+ cell->cpu_set = cpu_set;
+ if (copy_cpu_set)
+ memcpy(cell->cpu_set->bitmap, config_cpu_set, cpu_set_size);
+
+ cell->page_offset = config_ram->phys_start;
+
+ return 0;
+}
+
+static void destroy_cpu_set(struct cell *cell)
+{
+ if (cell->cpu_set != &cell->small_cpu_set)
+ page_free(&mem_pool, cell->cpu_set, 1);
+}
+
+int check_mem_regions(struct jailhouse_cell_desc *config)
+{
+ struct jailhouse_memory *mem;
+ unsigned int n;
+
+ mem = (void *)config + sizeof(struct jailhouse_cell_desc) +
+ config->cpu_set_size;
+
+ for (n = 0; n < config->num_memory_regions; n++, mem++) {
+ if (mem->phys_start & ~PAGE_MASK ||
+ mem->virt_start & ~PAGE_MASK ||
+ mem->size & ~PAGE_MASK ||
+ mem->access_flags & ~JAILHOUSE_MEM_VALID_FLAGS) {
+ printk("FATAL: Invalid memory bar (%p, %p, %p, %x)\n",
+ mem->phys_start, mem->virt_start, mem->size,
+ mem->access_flags);
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+int cell_create(struct per_cpu *cpu_data, unsigned long config_address)
+{
+ unsigned long header_size, total_size;
+ struct jailhouse_cell_desc *cfg;
+ struct cpu_set *shrinking_set;
+ unsigned int cell_pages, cpu;
+ struct cell *cell, *last;
+ int err;
+
+ cell_suspend(cpu_data);
+
+ header_size = (config_address & ~PAGE_MASK) +
+ sizeof(struct jailhouse_cell_desc);
+
+ err = page_map_create(hv_page_table, config_address & PAGE_MASK,
+ header_size, FOREIGN_MAPPING_BASE,
+ PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+ PAGE_DIR_LEVELS);
+ if (err)
+ goto resume_out;
+
+ cfg = (struct jailhouse_cell_desc *)(FOREIGN_MAPPING_BASE +
+ (config_address & ~PAGE_MASK));
+ total_size = jailhouse_cell_config_size(cfg);
+ if (total_size >
+ hypervisor_header.possible_cpus * NUM_FOREIGN_PAGES * PAGE_SIZE) {
+ total_size = PAGE_SIZE;
+ err = -ENOMEM;
+ goto unmap_out;
+ }
+
+ err = page_map_create(hv_page_table, config_address & PAGE_MASK,
+ total_size, FOREIGN_MAPPING_BASE,
+ PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+ PAGE_DIR_LEVELS);
+ if (err)
+ goto unmap_out;
+
+ err = check_mem_regions(cfg);
+ if (err)
+ goto unmap_out;
+
+ cell_pages = PAGE_ALIGN(sizeof(*cell)) / PAGE_SIZE;
+ cell = page_alloc(&mem_pool, cell_pages);
+ if (!cell) {
+ err = -ENOMEM;
+ goto unmap_out;
+ }
+
+ err = cell_init(cell, cfg, true);
+ if (err)
+ goto err_free_cell;
+
+ /* don't assign the CPU we are currently running on */
+ if (cpu_data->cpu_id <= cell->cpu_set->max_cpu_id &&
+ test_bit(cpu_data->cpu_id, cell->cpu_set->bitmap)) {
+ err = -EBUSY;
+ goto err_free_cpu_set;
+ }
+
+ shrinking_set = cpu_data->cell->cpu_set;
+
+ /* shrinking set must be super-set of new cell's cpu set */
+ if (shrinking_set->max_cpu_id < cell->cpu_set->max_cpu_id) {
+ err = -EINVAL;
+ goto err_free_cpu_set;
+ }
+ for_each_cpu(cpu, cell->cpu_set)
+ if (!test_bit(cpu, shrinking_set->bitmap)) {
+ err = -EINVAL;
+ goto err_free_cpu_set;
+ }
+
+ for_each_cpu(cpu, cell->cpu_set)
+ clear_bit(cpu, shrinking_set->bitmap);
+
+ err = arch_cell_create(cpu_data, cell, cfg);
+ if (err)
+ goto err_restore_cpu_set;
+
+ last = cell_list;
+ while (last->next)
+ last = last->next;
+ last->next = cell;
+
+ /* update cell references and clean up before releasing the cpus of
+ * the new cell */
+ for_each_cpu(cpu, cell->cpu_set)
+ per_cpu(cpu)->cell = cell;
+
+ printk("Created cell \"%s\"\n", cell->name);
+
+ page_map_destroy(hv_page_table, FOREIGN_MAPPING_BASE, total_size,
+ PAGE_DIR_LEVELS);
+ page_map_dump_stats("after cell creation");
+
+ for_each_cpu(cpu, cell->cpu_set)
+ arch_reset_cpu(cpu);
+
+resume_out:
+ cell_resume(cpu_data);
+
+ return err;
+
+err_restore_cpu_set:
+ for_each_cpu(cpu, cell->cpu_set)
+ set_bit(cpu, shrinking_set->bitmap);
+err_free_cpu_set:
+ destroy_cpu_set(cell);
+err_free_cell:
+ page_free(&mem_pool, cell, cell_pages);
+unmap_out:
+ page_map_destroy(hv_page_table, FOREIGN_MAPPING_BASE, total_size,
+ PAGE_DIR_LEVELS);
+ goto resume_out;
+}
+
+int shutdown(struct per_cpu *cpu_data)
+{
+ static bool shutdown_started;
+ struct cell *cell = cell_list->next;
+ unsigned int this_cpu = cpu_data->cpu_id;
+ unsigned int cpu;
+
+ // TODO: access control
+
+ spin_lock(&shutdown_lock);
+
+ if (!shutdown_started) {
+ shutdown_started = true;
+
+ printk("Shutting down hypervisor\n");
+
+ while (cell) {
+ printk(" Closing cell \"%s\"\n", cell->name);
+
+ for_each_cpu(cpu, cell->cpu_set) {
+ printk(" Releasing CPU %d\n", cpu);
+ arch_shutdown_cpu(cpu);
+ }
+ cell = cell->next;
+ }
+
+ printk(" Closing Linux cell \"%s\"\n", cell_list->name);
+ }
+ printk(" Releasing CPU %d\n", this_cpu);
+
+ spin_unlock(&shutdown_lock);
+
+ return 0;
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/paging.h>
+
+SECTIONS
+{
+ . = 0;
+ __start = .;
+ .header : { *(.header) }
+
+ . = ALIGN(16);
+ .text : { *(.text) }
+
+ . = ALIGN(16);
+ .rodata : { *(.rodata) }
+
+ . = ALIGN(16);
+ .data : { *(.data) }
+
+ .got : {
+ __got_start = .;
+ *(.got*)
+ __got_end = .;
+ }
+
+ . = ALIGN(16);
+ .bss : {
+ __bss_start = .;
+ *(.bss)
+ __bss_end = .;
+ }
+
+ . = ALIGN(PAGE_SIZE);
+ __page_pool = .;
+
+ /DISCARD/ : {
+ *(.eh_frame*)
+ }
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/types.h>
+
+struct acpi_table_header {
+ u32 signature;
+ u32 length;
+ u8 revision;
+ u8 checksum;
+ char oem_id[6];
+ char oem_table_id[8];
+ u32 oem_revision;
+ char asl_compiler_id[4];
+ u32 asl_compiler_revision;
+};
+
+const struct acpi_table_header *
+acpi_find_table(char name[4], const struct acpi_table_header *start);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_CELL_CONFIG_H
+#define _JAILHOUSE_CELL_CONFIG_H
+
+#define JAILHOUSE_CELL_NAME_MAXLEN 31
+
+struct jailhouse_cell_desc {
+ char name[JAILHOUSE_CELL_NAME_MAXLEN+1];
+
+ __u32 cpu_set_size;
+ __u32 num_memory_regions;
+ __u32 num_irq_lines;
+ __u32 pio_bitmap_size;
+
+ __u32 num_pci_devices;
+
+ __u32 padding[3];
+};
+
+#define JAILHOUSE_MEM_READ 0x0001
+#define JAILHOUSE_MEM_WRITE 0x0002
+#define JAILHOUSE_MEM_EXECUTE 0x0004
+#define JAILHOUSE_MEM_DMA 0x0008
+
+#define JAILHOUSE_MEM_VALID_FLAGS (JAILHOUSE_MEM_READ | \
+ JAILHOUSE_MEM_WRITE | \
+ JAILHOUSE_MEM_EXECUTE | \
+ JAILHOUSE_MEM_DMA)
+
+struct jailhouse_memory {
+ __u64 phys_start;
+ __u64 virt_start;
+ __u64 size;
+ __u64 access_flags;
+};
+
+struct jailhouse_irq_line {
+ __u32 num;
+ __u32 irqchip;
+};
+
+
+struct jailhouse_pci_bridge {
+ // TODO
+ __u32 num_device;
+};
+
+#define JAILHOUSE_PCI_TYPE_DEVICE 0x01
+#define JAILHOUSE_PCI_TYPE_BRIDGE 0x02
+
+struct jailhouse_pci_device {
+ // TODO
+ __u32 type;
+ __u16 domain;
+ __u8 bus;
+ __u8 devfn;
+} __attribute__((packed));
+
+
+struct jailhouse_system {
+ struct jailhouse_memory hypervisor_memory;
+ struct jailhouse_memory config_memory;
+ struct jailhouse_cell_desc system;
+};
+
+static inline __u32
+jailhouse_cell_config_size(struct jailhouse_cell_desc *cell)
+{
+ return sizeof(struct jailhouse_cell_desc) +
+ cell->cpu_set_size +
+ cell->num_memory_regions * sizeof(struct jailhouse_memory) +
+ cell->num_irq_lines * sizeof(struct jailhouse_irq_line) +
+ cell->pio_bitmap_size +
+ cell->num_pci_devices * sizeof(struct jailhouse_pci_device);
+}
+
+static inline __u32
+jailhouse_system_config_size(struct jailhouse_system *system)
+{
+ return sizeof(system->hypervisor_memory) +
+ sizeof(system->config_memory) +
+ jailhouse_cell_config_size(&system->system);
+}
+
+#endif /* !_JAILHOUSE_CELL_CONFIG_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/types.h>
+#include <asm/percpu.h>
+#include <jailhouse/cell-config.h>
+
+extern struct jailhouse_system *system_config;
+
+unsigned int next_cpu(unsigned int cpu, struct cpu_set *cpu_set,
+ int exception);
+
+#define for_each_cpu(cpu, set) \
+ for ((cpu) = -1; \
+ (cpu) = next_cpu((cpu), (set), -1), \
+ (cpu) <= (set)->max_cpu_id; \
+ )
+
+#define for_each_cpu_except(cpu, set, exception) \
+ for ((cpu) = -1; \
+ (cpu) = next_cpu((cpu), (set), (exception)), \
+ (cpu) <= (set)->max_cpu_id; \
+ )
+
+int check_mem_regions(struct jailhouse_cell_desc *config);
+int cell_init(struct cell *cell, struct jailhouse_cell_desc *config,
+ bool copy_cpu_set);
+
+int cell_create(struct per_cpu *cpu_data, unsigned long config_address);
+
+int shutdown(struct per_cpu *cpu_data);
+
+void arch_suspend_cpu(unsigned int cpu_id);
+void arch_resume_cpu(unsigned int cpu_id);
+void arch_reset_cpu(unsigned int cpu_id);
+void arch_shutdown_cpu(unsigned int cpu_id);
+
+int arch_cell_create(struct per_cpu *cpu_data, struct cell *new_cell,
+ struct jailhouse_cell_desc *config);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ENTRY_H
+#define _JAILHOUSE_ENTRY_H
+
+#include <jailhouse/header.h>
+#include <asm/percpu.h>
+
+#include <jailhouse/cell-config.h>
+
+#define EIO 5
+#define ENOMEM 12
+#define EBUSY 16
+#define ENODEV 19
+#define EINVAL 22
+#define ERANGE 34
+#define ENOSYS 38
+
+extern struct jailhouse_header hypervisor_header;
+extern void *config_memory;
+
+int arch_entry(int cpu_id);
+void got_init(void);
+void vm_exit(void);
+
+int entry(struct per_cpu *cpu_data);
+
+int arch_init_early(struct cell *linux_cell,
+ struct jailhouse_cell_desc *config);
+int arch_cpu_init(struct per_cpu *cpu_data);
+int arch_init_late(struct cell *linux_cell,
+ struct jailhouse_cell_desc *config);
+void __attribute__((noreturn)) arch_cpu_activate_vmm(struct per_cpu *cpu_data);
+void arch_cpu_restore(struct per_cpu *cpu_data);
+
+#endif /* !_JAILHOUSE_ENTRY_H */
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#define JAILHOUSE_SIGNATURE "JAILHOUS"
+
+struct jailhouse_header {
+ /* filled at build time */
+ char signature[8];
+ unsigned long bss_start;
+ unsigned long bss_end;
+ unsigned long percpu_size;
+ unsigned long entry;
+
+ /* filled by loader */
+ unsigned long size;
+ unsigned long page_offset;
+ unsigned int possible_cpus;
+ unsigned int online_cpus;
+};
+
+typedef int (*entry_func)(unsigned int);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/jailhouse.h>
+
+#define JAILHOUSE_HC_DISABLE 0
+#define JAILHOUSE_HC_CELL_CREATE 1
+#define JAILHOUSE_HC_CELL_DESTROY 2
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/percpu.h>
+
+struct mmio_access {
+ unsigned int inst_len;
+ unsigned int size;
+ unsigned int reg;
+};
+
+static inline u32 mmio_read32(void *address)
+{
+ return *(volatile u32 *)address;
+}
+
+static inline u64 mmio_read64(void *address)
+{
+ return *(volatile u64 *)address;
+}
+
+static inline void mmio_write32(void *address, u32 value)
+{
+ *(volatile u32 *)address = value;
+}
+
+static inline void mmio_write64(void *address, u64 value)
+{
+ *(volatile u64 *)address = value;
+}
+
+struct mmio_access mmio_parse(struct per_cpu *cpu_data, unsigned long pc,
+ unsigned long page_table_addr, bool is_write);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/entry.h>
+#include <asm/types.h>
+#include <asm/paging.h>
+
+#define PAGE_ALIGN(s) ((s + PAGE_SIZE-1) & PAGE_MASK)
+
+struct page_pool {
+ void *base_address;
+ unsigned long pages;
+ unsigned long used_pages;
+ unsigned long *used_bitmap;
+ unsigned long flags;
+};
+
+extern struct page_pool mem_pool;
+extern struct page_pool remap_pool;
+
+extern pgd_t *hv_page_table;
+
+void *page_alloc(struct page_pool *pool, unsigned int num);
+void page_free(struct page_pool *pool, void *first_page, unsigned int num);
+
+static inline unsigned long page_map_hvirt2phys(void *hvirt)
+{
+ return (unsigned long)hvirt - hypervisor_header.page_offset;
+}
+
+static inline void *page_map_phys2hvirt(unsigned long phys)
+{
+ return (void *)phys + hypervisor_header.page_offset;
+}
+
+unsigned long page_map_virt2phys(pgd_t *page_table,
+ unsigned long page_table_offset,
+ unsigned long virt);
+
+int page_map_create(pgd_t *page_table, unsigned long phys, unsigned long size,
+ unsigned long virt, unsigned long flags,
+ unsigned long table_flags, unsigned int levels);
+void page_map_destroy(pgd_t *page_table, unsigned long virt,
+ unsigned long size, unsigned int levels);
+
+void *page_map_get_foreign_page(unsigned int mapping_region,
+ unsigned long page_table_paddr,
+ unsigned long page_table_offset,
+ unsigned long virt, unsigned long flags);
+void page_map_release_foreign_page(unsigned int mapping_region);
+
+int paging_init(void);
+void page_map_dump_stats(const char *when);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/types.h>
+
+extern volatile unsigned long panic_in_progress;
+extern unsigned int panic_cpu;
+
+void printk(const char *fmt, ...);
+
+void panic_printk(const char *fmt, ...);
+
+void arch_dbg_write_init(void);
+void arch_dbg_write(const char *msg);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/processor.h>
+
+int phys_processor_id(void);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+void *memcpy(void *d, const void *s, unsigned long n);
+void *memset(void *s, int c, unsigned long n);
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/string.h>
+#include <asm/types.h>
+
+void *memset(void *s, int c, unsigned long n)
+{
+ u8 *p = s;
+
+ while (n-- > 0)
+ *p++ = c;
+ return s;
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/paging.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/string.h>
+#include <jailhouse/control.h>
+#include <asm/bitops.h>
+
+#define BITS_PER_PAGE (PAGE_SIZE * 8)
+
+#define PAGE_SCRUB_ON_FREE 0x1
+
+extern u8 __start[], __page_pool[];
+
+struct page_pool mem_pool;
+struct page_pool remap_pool = {
+ .base_address = (void *)REMAP_BASE_ADDR,
+ .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
+};
+
+pgd_t *hv_page_table;
+
+static void *page_alloc_one(struct page_pool *pool)
+{
+ unsigned long word, page_nr;
+
+ for (word = 0; word < pool->pages / BITS_PER_LONG; word++)
+ if (pool->used_bitmap[word] != ~0UL) {
+ page_nr = ffz(pool->used_bitmap[word]) +
+ word * BITS_PER_LONG;
+ if (page_nr >= pool->pages)
+ break;
+ set_bit(page_nr, pool->used_bitmap);
+ pool->used_pages++;
+ return pool->base_address + page_nr * PAGE_SIZE;
+ }
+
+ return NULL;
+}
+
+void *page_alloc(struct page_pool *pool, unsigned int num)
+{
+ void *start, *last, *next;
+ unsigned int allocated;
+
+ start = page_alloc_one(pool);
+ if (!start)
+ return NULL;
+
+ for (allocated = 1, last = start; allocated < num;
+ allocated++, last = next) {
+ next = page_alloc_one(pool);
+ if (next != last + PAGE_SIZE) {
+ page_free(pool, start, allocated);
+ return NULL;
+ }
+ }
+
+ return start;
+}
+
+void page_free(struct page_pool *pool, void *page, unsigned int num)
+{
+ unsigned long page_nr;
+
+ if (!page)
+ return;
+
+ while (num-- > 0) {
+ if (pool->flags & PAGE_SCRUB_ON_FREE)
+ memset(page, 0, PAGE_SIZE);
+ page_nr = (page - pool->base_address) / PAGE_SIZE;
+ clear_bit(page_nr, pool->used_bitmap);
+ pool->used_pages--;
+ page += PAGE_SIZE;
+ }
+}
+
+unsigned long page_map_virt2phys(pgd_t *page_table,
+ unsigned long page_table_offset,
+ unsigned long virt)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+#if PAGE_DIR_LEVELS == 4
+ pgd = pgd_offset(page_table, virt);
+ if (!pgd_valid(pgd))
+ return INVALID_PHYS_ADDR;
+
+ pud = pud4l_offset(pgd, page_table_offset, virt);
+#elif PAGE_DIR_LEVELS == 3
+ pud = pud3l_offset(pgd, page_table_offset, virt);
+#else
+# error Unsupported paging level
+#endif
+ if (!pud_valid(pud))
+ return INVALID_PHYS_ADDR;
+
+ pmd = pmd_offset(pud, page_table_offset, virt);
+ if (!pmd_valid(pud))
+ return INVALID_PHYS_ADDR;
+
+ if (pmd_is_hugepage(pmd))
+ return phys_address_hugepage(pmd, virt);
+
+ pte = pte_offset(pmd, page_table_offset, virt);
+ if (!pte_valid(pte))
+ return INVALID_PHYS_ADDR;
+
+ return phys_address(pte, virt);
+}
+
+int page_map_create(pgd_t *page_table, unsigned long phys, unsigned long size,
+ unsigned long virt, unsigned long flags,
+ unsigned long table_flags, unsigned int levels)
+{
+ unsigned long offs = hypervisor_header.page_offset;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ for (size = PAGE_ALIGN(size); size > 0;
+ phys += PAGE_SIZE, virt += PAGE_SIZE, size -= PAGE_SIZE) {
+ switch (levels) {
+ case 4:
+ pgd = pgd_offset(page_table, virt);
+ if (!pgd_valid(pgd)) {
+ pud = page_alloc(&mem_pool, 1);
+ if (!pud)
+ return -ENOMEM;
+ set_pgd(pgd, page_map_hvirt2phys(pud),
+ table_flags);
+ }
+ pud = pud4l_offset(pgd, offs, virt);
+ break;
+ case 3:
+ pud = pud3l_offset(page_table, virt);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!pud_valid(pud)) {
+ pmd = page_alloc(&mem_pool, 1);
+ if (!pmd)
+ return -ENOMEM;
+ set_pud(pud, page_map_hvirt2phys(pmd), table_flags);
+ }
+
+ pmd = pmd_offset(pud, offs, virt);
+ if (!pmd_valid(pmd)) {
+ pte = page_alloc(&mem_pool, 1);
+ if (!pte)
+ return -ENOMEM;
+ set_pmd(pmd, page_map_hvirt2phys(pte), table_flags);
+ }
+
+ pte = pte_offset(pmd, offs, virt);
+ set_pte(pte, phys, flags);
+ }
+
+ return 0;
+}
+
+void page_map_destroy(pgd_t *page_table, unsigned long virt,
+ unsigned long size, unsigned int levels)
+{
+ unsigned long offs = hypervisor_header.page_offset;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ for (size = PAGE_ALIGN(size); size > 0;
+ virt += PAGE_SIZE, size -= PAGE_SIZE) {
+ switch (levels) {
+ case 4:
+ pgd = pgd_offset(page_table, virt);
+ if (!pgd_valid(pgd))
+ continue;
+
+ pud = pud4l_offset(pgd, offs, virt);
+ break;
+ case 3:
+ pgd = 0; /* silence compiler warning */
+ pud = pud3l_offset(page_table, virt);
+ break;
+ default:
+ return;
+ }
+ if (!pud_valid(pud))
+ continue;
+
+ pmd = pmd_offset(pud, offs, virt);
+ if (!pmd_valid(pmd))
+ continue;
+
+ pte = pte_offset(pmd, offs, virt);
+ clear_pte(pte);
+
+ if (!pt_empty(pmd, offs))
+ continue;
+ page_free(&mem_pool, pte_offset(pmd, offs, 0), 1);
+ clear_pmd(pmd);
+
+ if (!pmd_empty(pud, offs))
+ continue;
+ page_free(&mem_pool, pmd_offset(pud, offs, 0), 1);
+ clear_pud(pud);
+
+ if (levels < 4 || !pud_empty(pgd, offs))
+ continue;
+ page_free(&mem_pool, pud4l_offset(pgd, offs, 0), 1);
+ clear_pgd(pgd);
+ }
+
+ flush_tlb();
+}
+
+void *page_map_get_foreign_page(unsigned int mapping_region,
+ unsigned long page_table_paddr,
+ unsigned long page_table_offset,
+ unsigned long virt, unsigned long flags)
+{
+ unsigned long page_virt, pt_virt, phys;
+#if PAGE_DIR_LEVELS == 4
+ pgd_t *pgd;
+#endif
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ int err;
+
+ page_virt = FOREIGN_MAPPING_BASE +
+ mapping_region * PAGE_SIZE * NUM_FOREIGN_PAGES;
+
+ pt_virt = page_virt + PAGE_SIZE;
+ phys = page_table_paddr + page_table_offset;
+ err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
+ PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+ PAGE_DIR_LEVELS);
+ if (err)
+ goto error_release;
+
+#if PAGE_DIR_LEVELS == 4
+ pgd = pgd_offset((pgd_t *)pt_virt, virt);
+ if (!pgd_valid(pgd))
+ goto error_release;
+ pt_virt += PAGE_SIZE;
+ phys = (unsigned long)pud4l_offset(pgd, page_table_offset, 0);
+ err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
+ PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+ PAGE_DIR_LEVELS);
+ if (err)
+ goto error_release;
+
+ pud = pud4l_offset((pgd_t *)&pt_virt, page_table_offset, virt);
+#elif PAGE_DIR_LEVELS == 3
+ pud = pud3l_offset((pgd_t *)pt_virt, virt);
+#else
+# error Unsupported paging level
+#endif
+ if (!pud_valid(pud))
+ goto error_release;
+ pt_virt += PAGE_SIZE;
+ phys = (unsigned long)pmd_offset(pud, page_table_offset, 0);
+ err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
+ PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+ PAGE_DIR_LEVELS);
+ if (err)
+ goto error_release;
+
+ pmd = pmd_offset((pud_t *)&pt_virt, page_table_offset, virt);
+ if (!pmd_valid(pmd))
+ goto error_release;
+ if (pmd_is_hugepage(pmd))
+ phys = phys_address_hugepage(pmd, virt);
+ else {
+ pt_virt += PAGE_SIZE;
+ phys = (unsigned long)pte_offset(pmd, page_table_offset, 0);
+ err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
+ PAGE_READONLY_FLAGS,
+ PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
+ if (err)
+ goto error_release;
+
+ pte = pte_offset((pmd_t *)&pt_virt, page_table_offset, virt);
+ if (!pte_valid(pte))
+ goto error_release;
+ phys = phys_address(pte, 0) + page_table_offset;
+ }
+
+ err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
+ flags, PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
+ if (err)
+ goto error_release;
+
+ return (void *)page_virt;
+
+error_release:
+ page_map_release_foreign_page(mapping_region);
+ return NULL;
+}
+
+void page_map_release_foreign_page(unsigned int mapping_region)
+{
+ page_map_destroy(hv_page_table,
+ FOREIGN_MAPPING_BASE +
+ mapping_region * PAGE_SIZE * NUM_FOREIGN_PAGES,
+ NUM_FOREIGN_PAGES * PAGE_SIZE, PAGE_DIR_LEVELS);
+}
+
+int paging_init(void)
+{
+ unsigned long per_cpu_pages, config_pages, bitmap_pages;
+ unsigned long n;
+ u8 *addr;
+ int err;
+
+ mem_pool.pages =
+ (hypervisor_header.size - (__page_pool - __start)) / PAGE_SIZE;
+ per_cpu_pages = hypervisor_header.possible_cpus *
+ sizeof(struct per_cpu) / PAGE_SIZE;
+ bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
+
+ system_config = (struct jailhouse_system *)
+ (__page_pool + per_cpu_pages * PAGE_SIZE);
+ config_pages = (jailhouse_system_config_size(system_config) +
+ PAGE_SIZE - 1) / PAGE_SIZE;
+
+ if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
+ goto error_nomem;
+
+ mem_pool.base_address = __page_pool;
+ mem_pool.used_bitmap =
+ (unsigned long *)(__page_pool + per_cpu_pages * PAGE_SIZE +
+ config_pages * PAGE_SIZE);
+ mem_pool.used_pages = per_cpu_pages + config_pages + bitmap_pages;
+ for (n = 0; n < mem_pool.used_pages; n++)
+ set_bit(n, mem_pool.used_bitmap);
+ mem_pool.flags = PAGE_SCRUB_ON_FREE;
+
+ remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
+ remap_pool.used_pages =
+ hypervisor_header.possible_cpus * NUM_FOREIGN_PAGES;
+ for (n = 0; n < remap_pool.used_pages; n++)
+ set_bit(n, remap_pool.used_bitmap);
+
+ hv_page_table = page_alloc(&mem_pool, 1);
+ if (!hv_page_table)
+ goto error_nomem;
+
+ /* Replicate hypervisor mapping of Linux */
+ for (addr = __start; addr < __start + hypervisor_header.size;
+ addr += PAGE_SIZE) {
+ err = page_map_create(hv_page_table, page_map_hvirt2phys(addr),
+ PAGE_SIZE, (unsigned long)addr,
+ PAGE_DEFAULT_FLAGS, PAGE_DEFAULT_FLAGS,
+ PAGE_DIR_LEVELS);
+ if (err)
+ goto error_nomem;
+ }
+
+ return 0;
+
+error_nomem:
+ printk("FATAL: page pool much too small\n");
+ return -ENOMEM;
+}
+
+void page_map_dump_stats(const char *when)
+{
+ printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
+ mem_pool.used_pages, mem_pool.pages,
+ remap_pool.used_pages, remap_pool.pages);
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#if BITS_PER_LONG < 64
+
+static unsigned long long div_u64_u64(unsigned long long dividend,
+ unsigned long long divisor)
+{
+ unsigned long long result = 0;
+ unsigned long long tmp_res, tmp_div;
+
+ while (dividend >= divisor) {
+ tmp_div = divisor << 1;
+ tmp_res = 1;
+ while (dividend >= tmp_div) {
+ tmp_div <<= 1;
+ tmp_res <<= 1;
+ }
+ dividend -= divisor * tmp_res;
+ result += tmp_res;
+ }
+ return result;
+}
+
+#else /* BITS_PER_LONG >= 64 */
+
+static inline unsigned long long div_u64_u64(unsigned long long dividend,
+ unsigned long long divisor)
+{
+ return dividend / divisor;
+}
+
+#endif /* BITS_PER_LONG >= 64 */
+
+static char *uint2str(unsigned long long value, char *buf)
+{
+ unsigned long long digit, divisor = 10000000000000000000ULL;
+ int first_digit = 1;
+
+ while (divisor > 0) {
+ digit = div_u64_u64(value, divisor);
+ value -= digit * divisor;
+ if (!first_digit || digit > 0 || divisor == 1) {
+ *buf++ = '0' + digit;
+ first_digit = 0;
+ }
+ divisor = div_u64_u64(divisor, 10);
+ }
+
+ return buf;
+}
+
+static char *int2str(long long value, char *buf)
+{
+ if (value < 0) {
+ *buf++ = '-';
+ value = -value;
+ }
+ return uint2str(value, buf);
+}
+
+static char *hex2str(unsigned long long value, char *buf,
+ unsigned long long leading_zero_mask)
+{
+ const char hexdigit[] = "0123456789abcdef";
+ unsigned long long digit, divisor = 0x1000000000000000ULL;
+ int first_digit = 1;
+
+ while (divisor > 0) {
+ digit = div_u64_u64(value, divisor);
+ value -= digit * divisor;
+ if (!first_digit || digit > 0 || divisor == 1 ||
+ divisor & leading_zero_mask) {
+ *buf++ = hexdigit[digit];
+ first_digit = 0;
+ }
+ divisor >>= 4;
+ }
+
+ return buf;
+}
+
+static char *align(char *p1, char *p0, unsigned long width)
+{
+ unsigned int n;
+
+ if (p1 - p0 >= width)
+ return p1;
+
+ for (n = 1; p1 - n >= p0; n++)
+ *(p0 + width - n) = *(p1 - n);
+ memset(p0, ' ', width - (p1 - p0));
+ return p0 + width;
+}
+
+static void __vprintk(const char *fmt, va_list ap)
+{
+ char buf[128];
+ char *p, *p0;
+ char c;
+ unsigned long long v;
+ unsigned int width;
+ bool longmode;
+
+ p = buf;
+
+ while (1) {
+ c = *fmt++;
+ if (c == 0)
+ break;
+ else if (c == '%') {
+ *p = 0;
+ console_write(buf);
+ p = buf;
+
+ c = *fmt++;
+
+ width = 0;
+ p0 = p;
+ while (c >= '0' && c <= '9') {
+ width = width * 10 + c - '0';
+ c = *fmt++;
+ if (width >= sizeof(buf) - 1)
+ width = 0;
+ }
+
+ longmode = false;
+ if (c == 'l') {
+ longmode = true;
+ c = *fmt++;
+ }
+
+ switch (c) {
+ case 'd':
+ if (longmode)
+ v = va_arg(ap, long);
+ else
+ v = va_arg(ap, int);
+ p = int2str(v, p);
+ p = align(p, p0, width);
+ break;
+ case 'p':
+ *p++ = '0';
+ *p++ = 'x';
+ v = va_arg(ap, unsigned long);
+ p = hex2str(v, p, (unsigned long)-1);
+ break;
+ case 's':
+ console_write(va_arg(ap, const char *));
+ break;
+ case 'u':
+ if (longmode)
+ v = va_arg(ap, unsigned long);
+ else
+ v = va_arg(ap, unsigned int);
+ p = uint2str(v, p);
+ p = align(p, p0, width);
+ break;
+ case 'x':
+ if (longmode)
+ v = va_arg(ap, unsigned long);
+ else
+ v = va_arg(ap, unsigned int);
+ p = hex2str(v, p, 0);
+ p = align(p, p0, width);
+ break;
+ default:
+ *p++ = '%';
+ *p++ = c;
+ break;
+ }
+ } else if (c == '\n') {
+ *p++ = c;
+ *p = 0;
+ console_write(buf);
+ p = buf;
+ *p++ = '\r';
+ } else
+ *p++ = c;
+
+ if (p >= &buf[sizeof(buf) - 1]) {
+ *p = 0;
+ console_write(buf);
+ p = buf;
+ }
+ }
+
+ *p = 0;
+ console_write(buf);
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <stdarg.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/processor.h>
+#include <jailhouse/string.h>
+#include <asm/spinlock.h>
+
+volatile unsigned long panic_in_progress;
+unsigned int panic_cpu = -1;
+
+static DEFINE_SPINLOCK(printk_lock);
+
+#define console_write(msg) arch_dbg_write(msg)
+#include "printk-core.c"
+
+void printk(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ spin_lock(&printk_lock);
+ __vprintk(fmt, ap);
+ spin_unlock(&printk_lock);
+
+ va_end(ap);
+}
+
+void panic_printk(const char *fmt, ...)
+{
+ unsigned int cpu_id = phys_processor_id();
+ va_list ap;
+
+ if (test_and_set_bit(0, &panic_in_progress) && panic_cpu != cpu_id)
+ return;
+ panic_cpu = cpu_id;
+
+ va_start(ap, fmt);
+
+ __vprintk(fmt, ap);
+
+ va_end(ap);
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/processor.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/entry.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/control.h>
+#include <jailhouse/string.h>
+#include <asm/spinlock.h>
+
+extern u8 __start[];
+extern u8 __bss_start[], __bss_end[];
+
+void *config_memory;
+
+static DEFINE_SPINLOCK(init_lock);
+static unsigned int master_cpu_id = -1;
+static volatile unsigned int initialized_cpus;
+static volatile int error;
+static struct cell linux_cell;
+
+static int register_linux_cpu(struct per_cpu *cpu_data)
+{
+ unsigned long *system_cpu_set =
+ (unsigned long *)(((void *)&system_config->system) +
+ sizeof(struct jailhouse_cell_desc));
+
+ if (cpu_data->cpu_id >= system_config->system.cpu_set_size * 8 ||
+ !test_bit(cpu_data->cpu_id, system_cpu_set))
+ return -EINVAL;
+
+ cpu_data->cell = &linux_cell;
+ set_bit(cpu_data->cpu_id, linux_cell.cpu_set->bitmap);
+ return 0;
+}
+
+static void init_early(unsigned int cpu_id)
+{
+ unsigned long size;
+
+ master_cpu_id = cpu_id;
+
+ /* must be first, printk/arch_dbg_write uses the GOT */
+ got_init();
+
+ arch_dbg_write_init();
+
+ printk("\nInitializing Jailhouse hypervisor on CPU %d\n", cpu_id);
+ printk("Code location: %p\n",
+ __start + sizeof(struct jailhouse_header));
+
+ error = paging_init();
+ if (error)
+ return;
+
+ if (system_config->config_memory.size > 0) {
+ size = PAGE_ALIGN(system_config->config_memory.size);
+
+ config_memory = page_alloc(&remap_pool, size / PAGE_SIZE);
+ if (!config_memory) {
+ error = -ENOMEM;
+ return;
+ }
+
+ error = page_map_create(hv_page_table,
+ system_config->config_memory.phys_start,
+ size, (unsigned long)config_memory,
+ PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+ PAGE_DIR_LEVELS);
+ if (error)
+ return;
+ }
+
+ error = check_mem_regions(&system_config->system);
+ if (error)
+ return;
+
+ error = arch_init_early(&linux_cell, &system_config->system);
+ if (error)
+ return;
+
+ error = cell_init(&linux_cell, &system_config->system, false);
+ if (error)
+ return;
+
+ cell_list = &linux_cell;
+
+ page_map_dump_stats("after early setup");
+ printk("Initializing first processor:\n");
+}
+
+static void cpu_init(struct per_cpu *cpu_data)
+{
+ int err;
+
+ printk(" CPU %d... ", cpu_data->cpu_id);
+
+ err = register_linux_cpu(cpu_data);
+ if (err)
+ goto failed;
+
+ err = arch_cpu_init(cpu_data);
+ if (err)
+ goto failed;
+
+ printk("OK\n");
+ initialized_cpus++;
+ return;
+
+failed:
+ printk("FAILED\n");
+ if (!error)
+ error = err;
+}
+
+static void init_late(void)
+{
+ error = arch_init_late(&linux_cell, &system_config->system);
+ if (error)
+ return;
+
+ page_map_dump_stats("after late setup");
+ printk("Initializing remaining processors:\n");
+}
+
+int entry(struct per_cpu *cpu_data)
+{
+ bool master = false;
+
+ spin_lock(&init_lock);
+
+ if (master_cpu_id == -1) {
+ master = true;
+ init_early(cpu_data->cpu_id);
+ }
+
+ if (!error) {
+ cpu_init(cpu_data);
+
+ if (master && !error)
+ init_late();
+ }
+
+ spin_unlock(&init_lock);
+
+ while (!error && initialized_cpus < hypervisor_header.online_cpus)
+ cpu_relax();
+
+ if (error) {
+ arch_cpu_restore(cpu_data);
+ return error;
+ }
+
+ if (master)
+ printk("Activating hypervisor\n");
+
+ /* point of no return */
+ arch_cpu_activate_vmm(cpu_data);
+}
+
+struct jailhouse_header __attribute__((section(".header")))
+hypervisor_header = {
+ .signature = JAILHOUSE_SIGNATURE,
+ .bss_start = (unsigned long)__bss_start,
+ .bss_end = (unsigned long)__bss_end,
+ .percpu_size = sizeof(struct per_cpu),
+ .entry = (unsigned long)arch_entry,
+};
--- /dev/null
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+# Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2. See
+# the COPYING file in the top-level directory.
+#
+
+LINUXINCLUDE := -I$(src)
+KBUILD_CFLAGS := -g -Os -Wall -Wstrict-prototypes -Wtype-limits \
+ -Wmissing-declarations -Wmissing-prototypes \
+ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
+ -fno-common -I.
+ifneq ($(wildcard $(src)/../hypervisor/include/jailhouse/config.h),)
+KBUILD_CFLAGS += -include $(src)/../hypervisor/include/jailhouse/config.h
+endif
+
+OBJCOPYFLAGS := -O binary
+LDFLAGS := -T
+
+ifeq ($(SRCARCH), x86)
+always := tiny-demo.bin apic-demo.bin
+endif
+
+tiny-demo-y := tiny-demo.o header.o printk.o pm-timer.o
+targets += $(tiny-demo-y)
+
+TINY_DEMO_OBJS = $(addprefix $(obj)/,$(tiny-demo-y))
+
+target += tiny-demo-linked.o
+$(obj)/tiny-demo-linked.o: $(src)/inmate.lds $(TINY_DEMO_OBJS)
+ $(call if_changed,ld)
+
+
+apic-demo-y := apic-demo.o header.o printk.o pm-timer.o
+targets += $(apic-demo-y)
+
+APIC_DEMO_OBJS = $(addprefix $(obj)/,$(apic-demo-y))
+
+target += apic-demo-linked.o
+$(obj)/apic-demo-linked.o: $(src)/inmate.lds $(APIC_DEMO_OBJS)
+ $(call if_changed,ld)
+
+
+targets += tiny-demo.bin apic-demo.bin
+$(obj)/%.bin: $(obj)/%-linked.o
+ $(call if_changed,objcopy)
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <inmate.h>
+
+#define NS_PER_MSEC 1000000UL
+#define NS_PER_SEC 1000000000UL
+
+#define NUM_IDT_DESC 33
+#define APIC_TIMER_VECTOR 32
+
+#define X2APIC_EOI 0x80b
+#define X2APIC_LVTT 0x832
+#define X2APIC_TMICT 0x838
+#define X2APIC_TMCCT 0x839
+#define X2APIC_TDCR 0x83e
+
+#define APIC_EOI_ACK 0
+
+static u32 idt[NUM_IDT_DESC * 4];
+static unsigned long apic_frequency;
+static unsigned long expected_time;
+static unsigned long min = -1, max;
+
+struct desc_table_reg {
+ u16 limit;
+ u64 base;
+} __attribute__((packed));
+
+static inline unsigned long read_msr(unsigned int msr)
+{
+ u32 low, high;
+
+ asm volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr));
+ return low | ((unsigned long)high << 32);
+}
+
+static inline void write_msr(unsigned int msr, unsigned long val)
+{
+ asm volatile("wrmsr"
+ : /* no output */
+ : "c" (msr), "a" (val), "d" (val >> 32)
+ : "memory");
+}
+
+static inline void write_idtr(struct desc_table_reg *val)
+{
+ asm volatile("lidtq %0" : "=m" (*val));
+}
+
+void irq_handler(void)
+{
+ unsigned long delta;
+
+ write_msr(X2APIC_EOI, APIC_EOI_ACK);
+
+ delta = read_pm_timer() - expected_time;
+ if (delta < min)
+ min = delta;
+ if (delta > max)
+ max = delta;
+ printk("Timer fired, jitter: %6ld ns, min: %6ld ns, max: %6ld ns\n",
+ delta, min, max);
+
+ expected_time += 100 * NS_PER_MSEC;
+ write_msr(X2APIC_TMICT,
+ (expected_time - read_pm_timer()) * apic_frequency / NS_PER_SEC);
+}
+
+static void init_apic(void)
+{
+ unsigned long entry = (unsigned long)irq_entry + FSEGMENT_BASE;
+ struct desc_table_reg dtr;
+ unsigned long start, end;
+ unsigned long tmr;
+
+ write_msr(X2APIC_TDCR, 3);
+
+ start = read_pm_timer();
+ write_msr(X2APIC_TMICT, 0xffffffff);
+
+ while (read_pm_timer() - start < 100 * NS_PER_MSEC)
+ cpu_relax();
+
+ end = read_pm_timer();
+ tmr = read_msr(X2APIC_TMCCT);
+
+ apic_frequency = (0xffffffff - tmr) * NS_PER_SEC / (end - start);
+
+ printk("Calibrated APIC frequency: %lu kHz\n",
+ (apic_frequency * 16 + 500) / 1000);
+
+ idt[APIC_TIMER_VECTOR * 4] = (entry & 0xffff) | (INMATE_CS << 16);
+ idt[APIC_TIMER_VECTOR * 4 + 1] = 0x8e00 | (entry & 0xffff0000);
+ idt[APIC_TIMER_VECTOR * 4 + 2] = entry >> 32;
+
+ dtr.limit = NUM_IDT_DESC * 16 - 1;
+ dtr.base = (u64)&idt;
+ write_idtr(&dtr);
+
+ write_msr(X2APIC_LVTT, APIC_TIMER_VECTOR);
+ expected_time = read_pm_timer();
+ write_msr(X2APIC_TMICT, 1);
+
+ asm volatile("sti");
+}
+
+void inmate_main(void)
+{
+ if (init_pm_timer())
+ init_apic();
+
+ while (1) {
+ asm volatile("hlt");
+ }
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <inmate.h>
+
+#define X86_CR0_PE 0x00000001
+#define X86_CR0_WP 0x00010000
+#define X86_CR0_PG 0x80000000
+
+#define X86_CR4_PAE 0x00000020
+
+#define MSR_EFER 0xc0000080
+#define EFER_LME 0x00000100
+
+ .code16gcc
+ .section ".boot", "ax"
+
+ ljmp $0xf000,$start16
+
+
+ .section ".startup", "ax"
+
+start16:
+ cs,lgdtl gdt_ptr
+
+ mov %cr0,%eax
+ or $X86_CR0_PE,%al
+ mov %eax,%cr0
+
+ ljmpl $LOADER_CS32,$start32 + FSEGMENT_BASE
+
+
+ .code32
+start32:
+ mov %cr4,%eax
+ or $X86_CR4_PAE,%eax
+ mov %eax,%cr4
+
+ mov $pml4 + FSEGMENT_BASE,%eax
+ mov %eax,%cr3
+
+ movl $MSR_EFER,%ecx
+ rdmsr
+ or $EFER_LME,%eax
+ wrmsr
+
+ mov $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE),%eax
+ mov %eax,%cr0
+
+ ljmpl $INMATE_CS,$start64 + FSEGMENT_BASE
+
+ .code64
+start64:
+ mov $stack_top,%rsp
+
+ mov $inmate_main,%rax
+ jmpq *%rax
+
+
+ .align(16)
+gdt:
+ .quad 0
+ .quad 0x00c09b000000ffff
+ .quad 0x00af9b000000ffff
+
+gdt_ptr:
+ .short gdt_ptr - gdt - 1
+ .long gdt + FSEGMENT_BASE
+
+ .align(4096)
+pml4:
+ .quad pdpt + FSEGMENT_BASE + 0x003
+
+ .align(4096)
+pdpt:
+ .quad pd + FSEGMENT_BASE + 0x003
+
+ .align(4096)
+pd:
+ .quad 0x0000000000000083
+
+
+ .global irq_entry
+ .balign 16
+irq_entry:
+ push %rax
+ push %rcx
+ push %rdx
+ push %rsi
+ push %rdi
+ push %r8
+ push %r9
+ push %r10
+ push %r11
+
+ call irq_handler - FSEGMENT_BASE
+
+ pop %r11
+ pop %r10
+ pop %r9
+ pop %r8
+ pop %rdi
+ pop %rsi
+ pop %rdx
+ pop %rcx
+ pop %rax
+
+ iretq
+
+
+/* to please linker if irq_entry remains unused */
+ .weak irq_handler
+irq_handler:
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#define FSEGMENT_BASE 0xf0000
+
+#define LOADER_CS32 0x8
+#define INMATE_CS 0x10
+
+#ifndef __ASSEMBLY__
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long s64;
+typedef unsigned long u64;
+
+typedef enum { true=1, false=0 } bool;
+
+static inline void cpu_relax(void)
+{
+ asm volatile("rep; nop");
+}
+
+static inline void outb(u8 v, u16 port)
+{
+ asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
+}
+
+static inline u8 inb(u16 port)
+{
+ u8 v;
+ asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
+ return v;
+}
+
+static inline u32 inl(u16 port)
+{
+ u32 v;
+ asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
+ return v;
+}
+
+void printk(const char *fmt, ...);
+
+void *memset(void *s, int c, unsigned long n);
+
+extern u8 irq_entry[];
+void irq_handler(void);
+
+void inmate_main(void);
+
+bool init_pm_timer(void);
+unsigned long read_pm_timer(void);
+#endif
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+SECTIONS
+{
+ /* 16-bit sections */
+ . = 0;
+ .startup : { *(.startup) }
+
+ . = 0xfff0;
+ .boot : {
+ *(.boot)
+ . = ALIGN(16);
+ }
+
+ /* 32-bit sections */
+ . = 0xe0000;
+ stack_top = .;
+ .bss : { *(.bss) }
+
+ . = 0xf0000 + SIZEOF(.startup);
+ .text : AT (ADDR(.text) & 0xffff) {
+ *(.text)
+ }
+
+ . = ALIGN(16);
+ .rodata : AT (ADDR(.rodata) & 0xffff) {
+ *(.rodata)
+ }
+
+ . = ALIGN(16);
+ .data : AT (ADDR(.data) & 0xffff) {
+ *(.data)
+ }
+
+ /DISCARD/ : {
+ *(.eh_frame*)
+ }
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <inmate.h>
+
+#define NS_PER_MSEC 1000000UL
+#define NS_PER_SEC 1000000000UL
+
+#define PM_TIMER_HZ 3579545
+#define PM_TIMER_OVERFLOW ((0x1000000 * 1000000000ULL) / PM_TIMER_HZ)
+
+static const unsigned int pm_timer_list[] = { 0x408, 0x1808, 0xb008, 0 };
+static unsigned int pm_timer;
+
+unsigned long read_pm_timer(void)
+{
+ static unsigned long last, overflows;
+ unsigned long tmr;
+
+ tmr = (inl(pm_timer) * NS_PER_SEC) / PM_TIMER_HZ;
+ if (tmr < last)
+ overflows += PM_TIMER_OVERFLOW;
+ last = tmr;
+ return tmr + overflows;
+}
+
+bool init_pm_timer(void)
+{
+ unsigned long val, loop;
+ unsigned int n = 0;
+
+ while (pm_timer_list[n]) {
+ pm_timer = pm_timer_list[n++];
+ val = read_pm_timer();
+ for (loop = 0; loop < 10; loop++)
+ cpu_relax();
+ if (read_pm_timer() != val) {
+ printk("Found PM Timer at %x\n", pm_timer);
+ return true;
+ }
+ }
+ printk("Could not find PM Timer\n");
+ return false;
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <stdarg.h>
+#include <inmate.h>
+
+#ifdef CONFIG_UART_OXPCIE952
+#define UART_BASE 0xe010
+#else
+#define UART_BASE 0x3f8
+#endif
+#define UART_TX 0x0
+#define UART_LSR 0x5
+#define UART_LSR_THRE 0x20
+
+static void uart_write(const char *msg)
+{
+ char c;
+
+ while (1) {
+ c = *msg++;
+ if (!c)
+ break;
+ while (!(inb(UART_BASE + UART_LSR) & UART_LSR_THRE))
+ cpu_relax();
+ outb(c, UART_BASE + UART_TX);
+ }
+}
+
+#define console_write(msg) uart_write(msg)
+#include "../hypervisor/printk-core.c"
+
+void printk(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ __vprintk(fmt, ap);
+
+ va_end(ap);
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <inmate.h>
+
+void inmate_main(void)
+{
+ unsigned long long start, now;
+ int n;
+
+ printk("Hello from this tiny cell!\n");
+
+ if (init_pm_timer()) {
+ start = read_pm_timer();
+ for (n = 0; n < 10; n++) {
+ do {
+ now = read_pm_timer();
+ cpu_relax();
+ } while (now - start < 1000000000ULL);
+ start += 1000000000ULL;
+ printk("PM Timer: %11lu\n", now);
+ }
+ }
+
+ asm volatile("hlt");
+}
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+struct jailhouse_preload_image {
+ __u64 source_address;
+ __u64 size;
+ __u64 target_address;
+ __u64 padding;
+};
+
+struct jailhouse_new_cell {
+ __u64 config_address;
+ __u32 config_size;
+ __u32 num_preload_images;
+ struct jailhouse_preload_image image[];
+};
+
+#define JAILHOUSE_ENABLE _IOW(0, 0, struct jailhouse_system)
+#define JAILHOUSE_DISABLE _IO(0, 1)
+#define JAILHOUSE_CELL_CREATE _IOW(0, 2, struct jailhouse_new_cell)
+#define JAILHOUSE_CELL_DESTROY _IOW(0, 3, const char *)
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/firmware.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/uaccess.h>
+#include <asm/smp.h>
+#include <asm/cacheflush.h>
+
+#include <jailhouse.h>
+#include <jailhouse/header.h>
+#include <jailhouse/hypercall.h>
+
+#define JAILHOUSE_FW_NAME "jailhouse.bin"
+
+MODULE_DESCRIPTION("Loader for Jailhouse partitioning hypervisor");
+MODULE_LICENSE("GPL");
+MODULE_FIRMWARE(JAILHOUSE_FW_NAME);
+
+static struct device *jailhouse_dev;
+static DEFINE_MUTEX(lock);
+static bool enabled;
+static void *hypervisor_mem;
+static cpumask_t offlined_cpus;
+static atomic_t call_done;
+static int error_code;
+
+#ifdef CONFIG_X86
+
+static void *jailhouse_ioremap(phys_addr_t start, unsigned long size)
+{
+ void *addr;
+
+ addr = (__force void *)ioremap_cache(start, size);
+ if (addr)
+ set_memory_x((unsigned long)addr, size / PAGE_SIZE);
+ return addr;
+}
+
+#elif defined(CONFIG_ARM)
+
+#include <asm/mach/map.h>
+
+static void *jailhouse_ioremap(phys_addr_t start, unsigned long size)
+{
+ return (__force void *)__arm_ioremap(start, size, MT_MEMORY);
+}
+
+#else
+#error Unsupported architecture
+#endif
+
+static void enter_hypervisor(void *info)
+{
+ struct jailhouse_header *header = info;
+ entry_func entry;
+ int err;
+
+ entry = (entry_func)(hypervisor_mem + header->entry);
+
+ /* either returns 0 or the same error code across all CPUs */
+ err = entry(smp_processor_id());
+ if (err)
+ error_code = err;
+
+ atomic_inc(&call_done);
+}
+
+static int jailhouse_enable(struct jailhouse_system __user *arg)
+{
+ unsigned long hv_core_size, percpu_size, config_size;
+ const struct firmware *hypervisor;
+ struct jailhouse_system config_header;
+ struct jailhouse_memory *hv_mem = &config_header.hypervisor_memory;
+ struct jailhouse_header *header;
+ int err;
+
+ if (copy_from_user(&config_header, arg, sizeof(config_header)))
+ return -EFAULT;
+
+ if (mutex_lock_interruptible(&lock) != 0)
+ return -EINTR;
+
+ err = -EBUSY;
+ if (enabled || !try_module_get(THIS_MODULE))
+ goto error_unlock;
+
+ err = request_firmware(&hypervisor, JAILHOUSE_FW_NAME, jailhouse_dev);
+ if (err)
+ goto error_put_module;
+
+ header = (struct jailhouse_header *)hypervisor->data;
+
+ err = -EINVAL;
+ if (memcmp(header->signature, JAILHOUSE_SIGNATURE,
+ sizeof(header->signature)) != 0)
+ goto error_release_fw;
+
+ hv_core_size = PAGE_ALIGN(header->bss_end);
+ percpu_size = num_possible_cpus() * header->percpu_size;
+ config_size = jailhouse_system_config_size(&config_header);
+ if (hv_mem->size <= hv_core_size + percpu_size + config_size)
+ goto error_release_fw;
+
+ /* CMA would be better... */
+ hypervisor_mem = jailhouse_ioremap(hv_mem->phys_start, hv_mem->size);
+ if (!hypervisor_mem)
+ goto error_release_fw;
+
+ memcpy(hypervisor_mem, hypervisor->data, hypervisor->size);
+ memset(hypervisor_mem + hypervisor->size, 0,
+ hv_mem->size - hypervisor->size);
+
+ header = (struct jailhouse_header *)hypervisor_mem;
+ header->size = hv_mem->size;
+ header->page_offset =
+ (unsigned long)hypervisor_mem - hv_mem->phys_start;
+ header->possible_cpus = num_possible_cpus();
+
+ if (copy_from_user(hypervisor_mem + hv_core_size + percpu_size, arg,
+ config_size)) {
+ err = -EFAULT;
+ goto error_unmap;
+ }
+
+ error_code = 0;
+
+ preempt_disable();
+
+ header->online_cpus = num_online_cpus();
+
+ atomic_set(&call_done, 0);
+ on_each_cpu(enter_hypervisor, header, 0);
+ while (atomic_read(&call_done) != num_online_cpus())
+ cpu_relax();
+
+ preempt_enable();
+
+ if (error_code) {
+ err = error_code;
+ goto error_unmap;
+ }
+
+ release_firmware(hypervisor);
+
+ enabled = true;
+
+ mutex_unlock(&lock);
+
+ printk("The Jailhouse is opening.\n");
+
+ return 0;
+
+error_unmap:
+ iounmap((__force void __iomem *)hypervisor_mem);
+
+error_release_fw:
+ release_firmware(hypervisor);
+
+error_put_module:
+ module_put(THIS_MODULE);
+
+error_unlock:
+ mutex_unlock(&lock);
+ return err;
+}
+
+static void leave_hypervisor(void *info)
+{
+ int err;
+
+ /* either returns 0 or the same error code across all CPUs */
+ err = jailhouse_call0(JAILHOUSE_HC_DISABLE);
+ if (err)
+ error_code = err;
+
+ atomic_inc(&call_done);
+}
+
+static int jailhouse_disable(void)
+{
+ unsigned int cpu;
+ int err;
+
+ if (mutex_lock_interruptible(&lock) != 0)
+ return -EINTR;
+
+ if (!enabled) {
+ mutex_unlock(&lock);
+ return -EINVAL;
+ }
+
+ error_code = 0;
+
+ preempt_disable();
+
+ atomic_set(&call_done, 0);
+ on_each_cpu(leave_hypervisor, NULL, 0);
+ while (atomic_read(&call_done) != num_online_cpus())
+ cpu_relax();
+
+ preempt_enable();
+
+ err = error_code;
+ if (err)
+ goto unlock_out;
+
+ iounmap((__force void __iomem *)hypervisor_mem);
+
+ for_each_cpu_mask(cpu, offlined_cpus)
+ if (cpu_up(cpu) != 0)
+ printk("Jailhouse: failed to bring CPU %d back "
+ "online\n", cpu);
+
+ enabled = false;
+ module_put(THIS_MODULE);
+
+ printk("The Jailhouse was closed.\n");
+
+unlock_out:
+ mutex_unlock(&lock);
+
+ return err;
+}
+
+static int jailhouse_cell_create(struct jailhouse_new_cell __user *arg)
+{
+ struct {
+ struct jailhouse_new_cell cell;
+ struct jailhouse_preload_image image;
+ } cell_buffer;
+ struct jailhouse_new_cell *cell = &cell_buffer.cell;
+ struct jailhouse_preload_image *image = &cell->image[0];
+ unsigned int mask_pos, bit_pos, cpu;
+ struct jailhouse_cell_desc *config;
+ struct jailhouse_memory *ram;
+ void *cell_mem;
+ u8 *cpu_mask;
+ int err;
+
+ if (copy_from_user(cell, arg, sizeof(*cell)))
+ return -EFAULT;
+
+ if (cell->num_preload_images != 1)
+ return -EINVAL;
+
+ if (copy_from_user(cell->image, arg->image,
+ sizeof(*cell->image) * cell->num_preload_images))
+ return -EFAULT;
+
+ config = kmalloc(cell->config_size, GFP_KERNEL | GFP_DMA);
+ if (!config)
+ return -ENOMEM;
+
+ if (copy_from_user(config, (void *)(unsigned long)cell->config_address,
+ cell->config_size)) {
+ err = -EFAULT;
+ goto kfree_config_out;
+ }
+ config->name[JAILHOUSE_CELL_NAME_MAXLEN] = 0;
+
+ cpu_mask = ((void *)config) + sizeof(struct jailhouse_cell_desc);
+ for (mask_pos = 0; mask_pos < config->cpu_set_size; mask_pos++)
+ for (bit_pos = 0; bit_pos < 8; bit_pos++) {
+ if (!(cpu_mask[mask_pos] & (1 << bit_pos)))
+ continue;
+ cpu = mask_pos * 8 + bit_pos;
+ if (cpu_online(cpu)) {
+ err = cpu_down(cpu);
+ if (err)
+ goto kfree_config_out;
+ cpu_set(cpu, offlined_cpus);
+ }
+ }
+
+ ram = ((void *)config) + sizeof(struct jailhouse_cell_desc) +
+ config->cpu_set_size;
+ if (config->num_memory_regions < 1 || ram->size < 1024 * 1024 ||
+ image->target_address + image->size > ram->size) {
+ err = -EINVAL;
+ goto kfree_config_out;
+ }
+
+ cell_mem = jailhouse_ioremap(ram->phys_start, ram->size);
+ if (!cell_mem) {
+ err = -EBUSY;
+ goto kfree_config_out;
+ }
+ memset(cell_mem, 0, ram->size);
+
+ if (copy_from_user(cell_mem + image->target_address,
+ (void *)(unsigned long)image->source_address,
+ image->size)) {
+ err = -EFAULT;
+ goto iounmap_out;
+ }
+
+ if (mutex_lock_interruptible(&lock) != 0) {
+ err = -EINTR;
+ goto kfree_config_out;
+ }
+
+ if (!enabled) {
+ err = -EINVAL;
+ goto unlock_out;
+ }
+
+ err = jailhouse_call1(JAILHOUSE_HC_CELL_CREATE, __pa(config));
+ if (err)
+ goto unlock_out;
+
+ printk("Created Jailhouse cell \"%s\"\n", config->name);
+
+unlock_out:
+ mutex_unlock(&lock);
+
+iounmap_out:
+ iounmap((__force void __iomem *)cell_mem);
+kfree_config_out:
+ kfree(config);
+
+ return err;
+}
+
+static long jailhouse_ioctl(struct file *file, unsigned int ioctl,
+ unsigned long arg)
+{
+ long err;
+
+ switch (ioctl) {
+ case JAILHOUSE_ENABLE:
+ err = jailhouse_enable(
+ (struct jailhouse_system __user *)arg);
+ break;
+ case JAILHOUSE_DISABLE:
+ err = jailhouse_disable();
+ break;
+ case JAILHOUSE_CELL_CREATE:
+ err = jailhouse_cell_create(
+ (struct jailhouse_new_cell __user *)arg);
+ break;
+ case JAILHOUSE_CELL_DESTROY:
+ err = -ENOSYS;
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+
+ return err;
+}
+
+static const struct file_operations jailhouse_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = jailhouse_ioctl,
+ .llseek = noop_llseek,
+};
+
+static struct miscdevice jailhouse_misc_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "jailhouse",
+ .fops = &jailhouse_fops,
+};
+
+static int __init jailhouse_init(void)
+{
+ jailhouse_dev = root_device_register("jailhouse");
+ if (IS_ERR(jailhouse_dev))
+ return PTR_ERR(jailhouse_dev);
+ return misc_register(&jailhouse_misc_dev);
+}
+
+static void __exit jailhouse_exit(void)
+{
+ misc_deregister(&jailhouse_misc_dev);
+ root_device_unregister(jailhouse_dev);
+}
+
+module_init(jailhouse_init);
+module_exit(jailhouse_exit);
--- /dev/null
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+# Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2. See
+# the COPYING file in the top-level directory.
+#
+
+CC = $(CROSS_COMPILE)gcc
+
+CFLAGS = -g -O3 -I.. -I../hypervisor/include \
+ -Wall -Wmissing-declarations -Wmissing-prototypes
+
+jailhouse: jailhouse.c ../jailhouse.h ../hypervisor/include/jailhouse/cell-config.h
+ $(CC) $(CFLAGS) -o $@ $<
+
+clean:
+ rm -f jailhouse
--- /dev/null
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ * Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include <jailhouse.h>
+
+static void help(const char *progname)
+{
+ printf("%s <command> <args>\n"
+ "\nAvailable commands:\n"
+ " enable CONFIGFILE\n"
+ " disable\n"
+ " cell create CONFIGFILE PRELOADIMAGE [-l ADDRESS]\n"
+ " cell destroy NAME\n",
+ progname);
+}
+
+static int open_dev()
+{
+ int fd;
+
+ fd = open("/dev/jailhouse", O_RDWR);
+ if (fd < 0) {
+ perror("opening /dev/jailhouse");
+ exit(1);
+ }
+ return fd;
+}
+
+static void *read_file(const char *name, size_t *size)
+{
+ struct stat stat;
+ void *buffer;
+ int fd;
+
+ fd = open(name, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "opening %s: %s\n", name, strerror(errno));
+ exit(1);
+ }
+
+ if (fstat(fd, &stat) < 0) {
+ perror("fstat");
+ exit(1);
+ }
+
+ buffer = malloc(stat.st_size);
+ if (!buffer) {
+ fprintf(stderr, "insufficient memory\n");
+ exit(1);
+ }
+
+ if (read(fd, buffer, stat.st_size) < stat.st_size) {
+ fprintf(stderr, "reading %s: %s\n", name, strerror(errno));
+ exit(1);
+ }
+
+ close(fd);
+
+ if (size)
+ *size = stat.st_size;
+
+ return buffer;
+}
+
+static int enable(int argc, char *argv[])
+{
+ void *config;
+ int err, fd;
+
+ if (argc != 3) {
+ help(argv[0]);
+ exit(1);
+ }
+
+ config = read_file(argv[2], NULL);
+
+ fd = open_dev();
+
+ err = ioctl(fd, JAILHOUSE_ENABLE, config);
+ if (err)
+ perror("JAILHOUSE_ENABLE");
+
+ close(fd);
+ free(config);
+
+ return err;
+}
+
+static int cell_create(int argc, char *argv[])
+{
+ struct {
+ struct jailhouse_new_cell cell;
+ struct jailhouse_preload_image image;
+ } params;
+ struct jailhouse_new_cell *cell = ¶ms.cell;
+ struct jailhouse_preload_image *image = params.cell.image;
+ size_t size;
+ int err, fd;
+ char *endp;
+
+ if (argc != 5 && argc != 7) {
+ help(argv[0]);
+ exit(1);
+ }
+
+ cell->config_address = (unsigned long)read_file(argv[3], &size);
+ cell->config_size = size;
+ cell->num_preload_images = 1;
+
+ image->source_address = (unsigned long)read_file(argv[4], &size);
+ image->size = size;
+ image->target_address = 0;
+
+ if (argc == 7) {
+ errno = 0;
+ image->target_address = strtoll(argv[6], &endp, 0);
+ if (errno != 0 || *endp != 0 || strcmp(argv[5], "-l") != 0) {
+ help(argv[0]);
+ exit(1);
+ }
+ }
+
+ fd = open_dev();
+
+ err = ioctl(fd, JAILHOUSE_CELL_CREATE, ¶ms);
+ if (err)
+ perror("JAILHOUSE_CELL_CREATE");
+
+ close(fd);
+ free((void *)(unsigned long)cell->config_address);
+ free((void *)(unsigned long)image->source_address);
+
+ return err;
+}
+
+static int cell_destroy(int argc, char *argv[])
+{
+ int err, fd;
+
+ if (argc != 4) {
+ help(argv[0]);
+ exit(1);
+ }
+
+ fd = open_dev();
+
+ err = ioctl(fd, JAILHOUSE_CELL_DESTROY, argv[3]);
+ if (err)
+ perror("JAILHOUSE_CELL_DESTROY");
+
+ close(fd);
+
+ return err;
+}
+
+static int cell_management(int argc, char *argv[])
+{
+ int err;
+
+ if (argc < 3) {
+ help(argv[0]);
+ exit(1);
+ }
+
+ if (strcmp(argv[2], "create") == 0)
+ err = cell_create(argc, argv);
+ else if (strcmp(argv[2], "destroy") == 0)
+ err = cell_destroy(argc, argv);
+ else {
+ help(argv[0]);
+ exit(1);
+ }
+
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int fd;
+ int err;
+
+ if (argc < 2) {
+ help(argv[0]);
+ exit(1);
+ }
+
+ if (strcmp(argv[1], "enable") == 0) {
+ err = enable(argc, argv);
+ } else if (strcmp(argv[1], "disable") == 0) {
+ fd = open_dev();
+ err = ioctl(fd, JAILHOUSE_DISABLE);
+ if (err)
+ perror("JAILHOUSE_DISABLE");
+ close(fd);
+ } else if (strcmp(argv[1], "cell") == 0) {
+ err = cell_management(argc, argv);
+ } else {
+ help(argv[0]);
+ exit(1);
+ }
+
+ return err ? 1 : 0;
+}