]> rtime.felk.cvut.cz Git - jailhouse.git/commitdiff
Jailhouse public release
authorJan Kiszka <jan.kiszka@siemens.com>
Sun, 20 Oct 2013 14:52:53 +0000 (16:52 +0200)
committerJan Kiszka <jan.kiszka@siemens.com>
Sun, 20 Oct 2013 14:52:53 +0000 (16:52 +0200)
Baseline for first public release.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
74 files changed:
.gitignore [new file with mode: 0644]
COPYING [new file with mode: 0644]
Makefile [new file with mode: 0644]
README [new file with mode: 0644]
config/Makefile [new file with mode: 0644]
config/chromebook.c [new file with mode: 0644]
config/h700-8G.c [new file with mode: 0644]
config/h87i.c [new file with mode: 0644]
config/minimal.c [new file with mode: 0644]
config/qemu-vm.c [new file with mode: 0644]
hypervisor/Makefile [new file with mode: 0644]
hypervisor/acpi.c [new file with mode: 0644]
hypervisor/arch/arm/Makefile [new file with mode: 0644]
hypervisor/arch/arm/entry.S [new file with mode: 0644]
hypervisor/arch/arm/include/asm/bitops.h [new file with mode: 0644]
hypervisor/arch/arm/include/asm/cell.h [new file with mode: 0644]
hypervisor/arch/arm/include/asm/jailhouse.h [new file with mode: 0644]
hypervisor/arch/arm/include/asm/paging.h [new file with mode: 0644]
hypervisor/arch/arm/include/asm/percpu.h [new file with mode: 0644]
hypervisor/arch/arm/include/asm/processor.h [new file with mode: 0644]
hypervisor/arch/arm/include/asm/spinlock.h [new file with mode: 0644]
hypervisor/arch/arm/include/asm/types.h [new file with mode: 0644]
hypervisor/arch/arm/setup.c [new file with mode: 0644]
hypervisor/arch/x86/Makefile [new file with mode: 0644]
hypervisor/arch/x86/apic.c [new file with mode: 0644]
hypervisor/arch/x86/control.c [new file with mode: 0644]
hypervisor/arch/x86/dbg-write.c [new file with mode: 0644]
hypervisor/arch/x86/entry.S [new file with mode: 0644]
hypervisor/arch/x86/fault.c [new file with mode: 0644]
hypervisor/arch/x86/include/asm/apic.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/bitops.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/cell.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/fault.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/io.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/jailhouse.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/paging.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/percpu.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/processor.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/spinlock.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/types.h [new file with mode: 0644]
hypervisor/arch/x86/include/asm/vmx.h [new file with mode: 0644]
hypervisor/arch/x86/mmio.c [new file with mode: 0644]
hypervisor/arch/x86/setup.c [new file with mode: 0644]
hypervisor/arch/x86/vmx.c [new file with mode: 0644]
hypervisor/control.c [new file with mode: 0644]
hypervisor/hypervisor.lds.S [new file with mode: 0644]
hypervisor/include/jailhouse/acpi.h [new file with mode: 0644]
hypervisor/include/jailhouse/cell-config.h [new file with mode: 0644]
hypervisor/include/jailhouse/control.h [new file with mode: 0644]
hypervisor/include/jailhouse/entry.h [new file with mode: 0644]
hypervisor/include/jailhouse/header.h [new file with mode: 0644]
hypervisor/include/jailhouse/hypercall.h [new file with mode: 0644]
hypervisor/include/jailhouse/mmio.h [new file with mode: 0644]
hypervisor/include/jailhouse/paging.h [new file with mode: 0644]
hypervisor/include/jailhouse/printk.h [new file with mode: 0644]
hypervisor/include/jailhouse/processor.h [new file with mode: 0644]
hypervisor/include/jailhouse/string.h [new file with mode: 0644]
hypervisor/lib.c [new file with mode: 0644]
hypervisor/paging.c [new file with mode: 0644]
hypervisor/printk-core.c [new file with mode: 0644]
hypervisor/printk.c [new file with mode: 0644]
hypervisor/setup.c [new file with mode: 0644]
inmate/Makefile [new file with mode: 0644]
inmate/apic-demo.c [new file with mode: 0644]
inmate/header.S [new file with mode: 0644]
inmate/inmate.h [new file with mode: 0644]
inmate/inmate.lds [new file with mode: 0644]
inmate/pm-timer.c [new file with mode: 0644]
inmate/printk.c [new file with mode: 0644]
inmate/tiny-demo.c [new file with mode: 0644]
jailhouse.h [new file with mode: 0644]
main.c [new file with mode: 0644]
tools/Makefile [new file with mode: 0644]
tools/jailhouse.c [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..0910855
--- /dev/null
@@ -0,0 +1,13 @@
+*.o
+*.mod.[co]
+*.cmd
+.tmp_versions
+Module.symvers
+modules.order
+jailhouse.ko
+hypervisor/include/jailhouse/config.h
+hypervisor/hypervisor.lds
+hypervisor/jailhouse.bin
+tools/jailhouse
+config/*.cell
+inmate/*.bin
diff --git a/COPYING b/COPYING
new file mode 100644 (file)
index 0000000..ffd9b72
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,346 @@
+This copyright does not cover applications or operating systems that
+run inside hypervisor cells, also if they use hypervisor services by
+normal hypercalls. This is considered normal use of the hypervisor
+and is not a "derived work".
+
+---------------------------------------------------------------------
+
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..b693c93
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,32 @@
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+#  Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+subdir-y := hypervisor config inmate
+
+obj-m := jailhouse.o
+
+ccflags-y := -I$(src)/hypervisor/arch/$(SRCARCH)/include \
+            -I$(src)/hypervisor/include
+
+jailhouse-y := main.o
+
+# out-of-tree build
+
+KERNELDIR = /lib/modules/`uname -r`/build
+
+modules modules_install clean:
+       $(MAKE) -C $(KERNELDIR) SUBDIRS=`pwd` $@
+
+install: modules_install
+       depmod -aq
+
+.PHONY: modules_install install clean
diff --git a/README b/README
new file mode 100644 (file)
index 0000000..cac63c3
--- /dev/null
+++ b/README
@@ -0,0 +1,91 @@
+JAILHOUSE
+=========
+
+Jailhouse is a partitioning Hypervisor based on Linux. It is able to run
+bare-metal applications or (adapted) operating systems besides Linux. For this
+purpose it configures CPU and device virtualization features of the hardware
+platform in a way that none of these domains, called "cells" here, can
+interfere with each other in an unacceptable way.
+
+Jailhouse is optimized for simplicity rather than feature richness. Once
+activated, it runs bare-metal, i.e. it takes full control over the hardware
+and need no external support. However, in contrast to other bare-metal
+hypervisors, it is loaded and configured by a normal Linux system. Its
+management interface is based on Linux infrastructure. So you boot Linux
+first, then you enable Jailhouse and finally you split off parts of the
+system's resources and assign them to additional cells.
+
+
+WARNING: This is work in progress! Don't expect things to be complete in any
+dimension. Use at your own risk. And keep the reset button in reach.
+
+
+Requirements (preliminary)
+--------------------------
+
+currently:
+ - Intel x86 processor with VMX support, more precisely
+    - EPT (extended page tables)
+    - unrestricted guest mode
+ - at least 2 logical CPUs
+
+upcoming:
+ - Intel IOMMU with interrupt remapping support
+
+
+Build
+-----
+
+Simply run make, optionally specifying the target kernel directory:
+
+    make [KERNELDIR=/path/to/kernel/objects]
+
+Note that the command line tool "jailhouse" requires a separate make run from
+within the tools/ directory.
+
+
+Configuration
+-------------
+
+Jailhouse requires one configuration file for the complete system and one for
+each additional cell beside Linux. The configuration is currently being
+defined manually by filling C structures. To study the structure, use
+config/qemu-vm.c for a system configuration and config/minimal.c for a cell
+configuration as reference. The build system will pick up every .c file from
+the config/ directory and generate a corresponding .cell file. .cell files can
+then be passed to the jailhouse command line tool for enabling the hypervisor
+and creating new cells.
+
+
+Demonstration in QEMU/KVM
+-------------------------
+
+The included system configuration qemu-vm.c can be used to run Jailhouse in
+QEMU/KVM virtual machine on Intel x86 hosts. Currently it requires kvm.git,
+next branch on the host (in order to get support for nested unrestricted guest
+mode). 3.13 is expected to include all necessary feature for this test. You
+also need a Linux guest image with a recent kernel (tested with >= 3.9) and
+the ability to build a module for this kernel. Make sure the kvm-intel module
+was loaded with nested=1 to enable nested VMX support. Start the virtual
+machine as follows:
+
+    qemu-system-x86_64 LinuxInstallation.img -m 1G -enable-kvm -serial stdio \
+        -cpu kvm64,-kvm_pv_eoi,-kvm_steal_time,-kvm_asyncpf,-kvmclock,+vmx,+x2apic \
+        -smp 4
+
+Inside the VM, make sure that jailhouse.bin, generated by the build process,
+is available for firmware loading (typically /lib/firmware). Load jailhouse.ko
+and then enable Jailhouse like this:
+
+    jailhouse enable /path/to/qemu-vm.cell
+
+Next you can create a cell with a demonstration application as follows:
+
+    jailhouse cell create /path/to/minimal.cell /path/to/apic-demo.bin \
+        -l 0xf0000
+
+apic-demo.bin is left by the built process in the inmate/ directory. This
+application will program the APIC timer interrupt to fire at 10 Hz, measuring
+the jitter against the PM timer and displaying the result on the 
+console. Given that this demonstration runs in a virtual machine, obviously
+no decent latencies should be expected.
diff --git a/config/Makefile b/config/Makefile
new file mode 100644 (file)
index 0000000..66cef34
--- /dev/null
@@ -0,0 +1,26 @@
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+#  Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+ccflags-y := -I$(src)/../hypervisor/include
+
+OBJCOPYFLAGS := -O binary
+
+CONFIGS = $(shell cd $(src); ls *.c)
+
+always := $(CONFIGS:.c=.cell)
+
+targets += $(CONFIGS:.c=.o) $(CONFIGS:.c=.cell)
+
+dummy: $(addprefix $(obj)/,$(CONFIGS:.c=.o))
+
+$(obj)/%.cell: $(obj)/%.o
+       $(call if_changed,objcopy)
diff --git a/config/chromebook.c b/config/chromebook.c
new file mode 100644 (file)
index 0000000..40968dc
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Test configuration for Samsung Chromebook, 2 GB RAM, 64 MB hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+#define ALIGN __attribute__((aligned(1)))
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
+struct {
+       struct jailhouse_system ALIGN header;
+       __u64 ALIGN cpus[1];
+       struct jailhouse_memory ALIGN mem_regions[1];
+} ALIGN config = {
+       .header = {
+               .hypervisor_memory = {
+                       .phys_start = 0xbc000000,
+                       .size = 0x4000000,
+               },
+               .system = {
+                       .name = "Samsung Chromebook",
+
+                       .cpu_set_size = sizeof(config.cpus),
+                       .num_memory_regions = ARRAY_SIZE(config.mem_regions),
+                       .num_irq_lines = 0,
+                       .pio_bitmap_size = 0,
+
+                       .num_pci_devices = 0,
+               },
+       },
+
+       .cpus = {
+               0xf,
+       },
+
+       .mem_regions = {
+               /* RAM */ {
+                       .phys_start = 0x0,
+                       .virt_start = 0x0,
+                       .size = 0x3c000000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE,
+               },
+       },
+};
diff --git a/config/h700-8G.c b/config/h700-8G.c
new file mode 100644 (file)
index 0000000..1301314
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Test configuration for Celsius H700, 8 GB RAM, 64 MB hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+#define ALIGN __attribute__((aligned(1)))
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
+struct {
+       struct jailhouse_system ALIGN header;
+       __u64 ALIGN cpus[1];
+       struct jailhouse_memory ALIGN mem_regions[9];
+       __u8 ALIGN pio_bitmap[0x2000];
+} ALIGN config = {
+       .header = {
+               .hypervisor_memory = {
+                       .phys_start = 0x3c000000,
+                       .size = 0x4000000,
+               },
+               .config_memory = {
+                       .phys_start = 0xbf7de000,
+                       .size = 0x21000,
+               },
+               .system = {
+                       .name = "Celsius H700",
+
+                       .cpu_set_size = sizeof(config.cpus),
+                       .num_memory_regions = ARRAY_SIZE(config.mem_regions),
+                       .num_irq_lines = 0,
+                       .pio_bitmap_size = ARRAY_SIZE(config.pio_bitmap),
+
+                       .num_pci_devices = 0,
+               },
+       },
+
+       .cpus = {
+               0xf,
+       },
+
+       .mem_regions = {
+               /* RAM */ {
+                       .phys_start = 0x0,
+                       .virt_start = 0x0,
+                       .size = 0x3c000000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+                               JAILHOUSE_MEM_DMA,
+               },
+               /* RAM */ {
+                       .phys_start = 0x40000000,
+                       .virt_start = 0x40000000,
+                       .size = 0x7f7de000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+                               JAILHOUSE_MEM_DMA,
+               },
+               /* ACPI */ {
+                       .phys_start = 0xbf7de000,
+                       .virt_start = 0xbf7de000,
+                       .size = 0x21000,
+                       .access_flags = JAILHOUSE_MEM_READ,
+               },
+               /* RAM */ {
+                       .phys_start = 0xbf7ff000,
+                       .virt_start = 0xbf7ff000,
+                       .size = 0x801000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+                               JAILHOUSE_MEM_DMA,
+               },
+               /* PCI */ {
+                       .phys_start = 0xc0000000,
+                       .virt_start = 0xc0000000,
+                       .size = 0x3eb00000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE,
+               },
+               /* yeah, that's not really safe... */
+               /* IOAPIC */ {
+                       .phys_start = 0xfec00000,
+                       .virt_start = 0xfec00000,
+                       .size = 0x1000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE,
+               },
+               /* the same here until we catch MSIs via interrupt remapping */
+               /* HPET */ {
+                       .phys_start = 0xfed00000,
+                       .virt_start = 0xfed00000,
+                       .size = 0x1000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE,
+               },
+               /* RAM */ {
+                       .phys_start = 0x100000000,
+                       .virt_start = 0x100000000,
+                       .size = 0xfc000000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+                               JAILHOUSE_MEM_DMA,
+               },
+               /* RAM */ {
+                       .phys_start = 0x200000000,
+                       .virt_start = 0x200000000,
+                       .size = 0x3c000000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+                               JAILHOUSE_MEM_DMA,
+               },
+       },
+
+       .pio_bitmap = {
+               [     0/8 ...   0x1f/8] = -1,
+               [  0x20/8 ...   0x27/8] = 0xfc, /* HACK: PIC */
+               [  0x28/8 ...   0x3f/8] = -1,
+               [  0x40/8 ...   0x47/8] = 0xf0, /* PIT */
+               [  0x48/8 ...   0x5f/8] = -1,
+               [  0x60/8 ...   0x67/8] = 0x0, /* HACK: 8042, and more? */
+               [  0x68/8 ...   0x6f/8] = -1,
+               [  0x70/8 ...   0x77/8] = 0xfc, /* rtc */
+               [  0x78/8 ...   0x7f/8] = -1,
+               [  0x80/8 ...   0x8f/8] = 0, /* dma */
+               [  0x90/8 ...  0x16f/8] = -1,
+               [ 0x170/8 ...  0x177/8] = 0, /* ide */
+               [ 0x178/8 ...  0x1ef/8] = -1,
+               [ 0x1f0/8 ...  0x1f7/8] = 0, /* ide */
+               [ 0x1f8/8 ...  0x2f7/8] = -1,
+               [ 0x2f8/8 ...  0x2ff/8] = 0, /* serial2 */
+               [ 0x300/8 ...  0x36f/8] = -1,
+               [ 0x370/8 ...  0x377/8] = 0xbf, /* ide */
+               [ 0x378/8 ...  0x3af/8] = -1,
+               [ 0x3b0/8 ...  0x3df/8] = 0, /* VGA */
+               [ 0x3e0/8 ...  0x3f7/8] = -1,
+               [ 0x3f8/8 ...  0x3ff/8] = 0, /* serial 1 */
+               [ 0x400/8 ...  0x47f/8] = 0, /* ACPI...? */
+               [ 0x480/8 ...  0xcf7/8] = -1,
+               [ 0xcf8/8 ... 0xffff/8] = 0, /* HACK: full PCI */
+       },
+};
diff --git a/config/h87i.c b/config/h87i.c
new file mode 100644 (file)
index 0000000..812d17f
--- /dev/null
@@ -0,0 +1,132 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Test configuration for Asus H87I-PLUS, 4 GB RAM, 64 MB hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+#define ALIGN __attribute__((aligned(1)))
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
+struct {
+       struct jailhouse_system ALIGN header;
+       __u64 ALIGN cpus[1];
+       struct jailhouse_memory ALIGN mem_regions[8];
+       __u8 ALIGN pio_bitmap[0x2000];
+} ALIGN config = {
+       .header = {
+               .hypervisor_memory = {
+                       .phys_start = 0x3c000000,
+                       .size = 0x4000000,
+               },
+               .config_memory = {
+                       .phys_start = 0xcca64000,
+                       .size = 0x15000,
+               },
+               .system = {
+                       .name = "H87I-PLUS",
+
+                       .cpu_set_size = sizeof(config.cpus),
+                       .num_memory_regions = ARRAY_SIZE(config.mem_regions),
+                       .num_irq_lines = 0,
+                       .pio_bitmap_size = ARRAY_SIZE(config.pio_bitmap),
+
+                       .num_pci_devices = 0,
+               },
+       },
+
+       .cpus = {
+               0xff,
+       },
+
+       .mem_regions = {
+               /* RAM */ {
+                       .phys_start = 0x0,
+                       .virt_start = 0x0,
+                       .size = 0x3c000000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+                               JAILHOUSE_MEM_DMA,
+               },
+               /* RAM */ {
+                       .phys_start = 0x40000000,
+                       .virt_start = 0x40000000,
+                       .size = 0x8ca64000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+                               JAILHOUSE_MEM_DMA,
+               },
+               /* ACPI */ {
+                       .phys_start = 0xcca64000,
+                       .virt_start = 0xcca64000,
+                       .size = 0x15000,
+                       .access_flags = JAILHOUSE_MEM_READ,
+               },
+               /* RAM */ {
+                       .phys_start = 0xcca79000,
+                       .virt_start = 0xcca79000,
+                       .size = 0x12787000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+                               JAILHOUSE_MEM_DMA,
+               },
+               /* PCI */ {
+                       .phys_start = 0xdf200000,
+                       .virt_start = 0xdf200000,
+                       .size = 0x1fa00000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE,
+               },
+               /* yeah, that's not really safe... */
+               /* IOAPIC */ {
+                       .phys_start = 0xfec00000,
+                       .virt_start = 0xfec00000,
+                       .size = 0x1000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE,
+               },
+               /* the same here until we catch MSIs via interrupt remapping */
+               /* HPET */ {
+                       .phys_start = 0xfed00000,
+                       .virt_start = 0xfed00000,
+                       .size = 0x1000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE,
+               },
+               /* RAM */ {
+                       .phys_start = 0x100000000,
+                       .virt_start = 0x100000000,
+                       .size = 0x20000000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+                               JAILHOUSE_MEM_DMA,
+               },
+       },
+
+       .pio_bitmap = {
+               [     0/8 ...   0x1f/8] = -1,
+               [  0x20/8 ...   0x27/8] = 0xfc, /* HACK: PIC */
+               [  0x28/8 ...   0x3f/8] = -1,
+               [  0x40/8 ...   0x47/8] = 0xf0, /* PIT */
+               [  0x48/8 ...   0x5f/8] = -1,
+               [  0x60/8 ...   0x67/8] = 0x0, /* HACK: 8042, and more? */
+               [  0x68/8 ...   0x6f/8] = -1,
+               [  0x70/8 ...   0x77/8] = 0xfc, /* rtc */
+               [  0x78/8 ...   0x7f/8] = -1,
+               [  0x80/8 ...   0x8f/8] = 0, /* dma */
+               [  0x90/8 ...  0x3af/8] = -1,
+               [ 0x3b0/8 ...  0x3df/8] = 0, /* VGA */
+               [ 0x3e0/8 ...  0xcf7/8] = -1,
+               [ 0xcf8/8 ... 0xffff/8] = 0, /* HACK: full PCI */
+       },
+};
diff --git a/config/minimal.c b/config/minimal.c
new file mode 100644 (file)
index 0000000..bcfb73d
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Minimal configuration for demo inmates, 1 CPU, 1 MB RAM, 1 serial port
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+#define ALIGN __attribute__((aligned(1)))
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
+struct {
+       struct jailhouse_cell_desc ALIGN cell;
+       __u64 ALIGN cpus[1];
+       struct jailhouse_memory ALIGN mem_regions[1];
+       __u8 ALIGN pio_bitmap[0x2000];
+} ALIGN config = {
+       .cell = {
+               .name = "Minimal",
+
+               .cpu_set_size = sizeof(config.cpus),
+               .num_memory_regions = ARRAY_SIZE(config.mem_regions),
+               .num_irq_lines = 0,
+               .pio_bitmap_size = ARRAY_SIZE(config.pio_bitmap),
+
+               .num_pci_devices = 0,
+       },
+
+       .cpus = {
+               0x8,
+       },
+
+       .mem_regions = {
+               /* RAM */ {
+                       .phys_start = 0x3bf00000,
+                       .virt_start = 0,
+                       .size = 0x00100000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE,
+               },
+       },
+
+       .pio_bitmap = {
+               [     0/8 ...  0x3f7/8] = -1,
+               [ 0x3f8/8 ...  0x3ff/8] = 0, /* serial1 */
+               [ 0x400/8 ...  0x407/8] = -1,
+               [ 0x408/8 ...  0x40f/8] = 0xf0, /* PM-timer H700 */
+               [ 0x410/8 ... 0x1807/8] = -1,
+               [0x1808/8 ... 0x180f/8] = 0xf0, /* PM-timer H87I-PLUS */
+               [0x1810/8 ... 0xb007/8] = -1,
+               [0xb008/8 ... 0xb00f/8] = 0xf0, /* PM-timer QEMU */
+               [0xb010/8 ... 0xe00f/8] = -1,
+               [0xe010/8 ... 0xe017/8] = 0, /* OXPCIe952 serial1 */
+               [0xe018/8 ... 0xffff/8] = -1,
+       },
+};
diff --git a/config/qemu-vm.c b/config/qemu-vm.c
new file mode 100644 (file)
index 0000000..0bc84e7
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Test configuration for QEMU VM, 1 GB RAM, 64 MB hypervisor (-8 K ACPI)
+ * Command line:
+ * qemu-system-x86_64 /path/to/image -m 1G -enable-kvm -smp 4 \
+ *  -virtfs local,path=/local/path,security_model=passthrough,mount_tag=host \
+ *  -cpu kvm64,-kvm_pv_eoi,-kvm_steal_time,-kvm_asyncpf,-kvmclock,+vmx,+x2apic
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+#define ALIGN __attribute__((aligned(1)))
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
+struct {
+       struct jailhouse_system ALIGN header;
+       __u64 ALIGN cpus[1];
+       struct jailhouse_memory ALIGN mem_regions[5];
+       __u8 ALIGN pio_bitmap[0x2000];
+} ALIGN config = {
+       .header = {
+               .hypervisor_memory = {
+                       .phys_start = 0x3c000000,
+                       .size = 0x4000000 - 0x2000,
+               },
+               .config_memory = {
+                       .phys_start = 0x3fffe000,
+                       .size = 0x2000,
+               },
+               .system = {
+                       .name = "QEMU Linux VM",
+
+                       .cpu_set_size = sizeof(config.cpus),
+                       .num_memory_regions = ARRAY_SIZE(config.mem_regions),
+                       .num_irq_lines = 0,
+                       .pio_bitmap_size = ARRAY_SIZE(config.pio_bitmap),
+
+                       .num_pci_devices = 0,
+               },
+       },
+
+       .cpus = {
+               0xf,
+       },
+
+       .mem_regions = {
+               /* RAM */ {
+                       .phys_start = 0x0,
+                       .virt_start = 0x0,
+                       .size = 0x3c000000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE | JAILHOUSE_MEM_EXECUTE |
+                               JAILHOUSE_MEM_DMA,
+               },
+               /* ACPI */ {
+                       .phys_start = 0x3fffe000,
+                       .virt_start = 0x3fffe000,
+                       .size = 0x2000,
+                       .access_flags = JAILHOUSE_MEM_READ,
+               },
+               /* PCI */ {
+                       .phys_start = 0x80000000,
+                       .virt_start = 0x80000000,
+                       .size = 0x7ec00000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE,
+               },
+               /* yeah, that's not really safe... */
+               /* IOAPIC */ {
+                       .phys_start = 0xfec00000,
+                       .virt_start = 0xfec00000,
+                       .size = 0x1000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE,
+               },
+               /* the same here until we catch MSIs via interrupt remapping */
+               /* HPET */ {
+                       .phys_start = 0xfed00000,
+                       .virt_start = 0xfed00000,
+                       .size = 0x1000,
+                       .access_flags = JAILHOUSE_MEM_READ |
+                               JAILHOUSE_MEM_WRITE,
+               },
+       },
+
+       .pio_bitmap = {
+               [     0/8 ...   0x1f/8] = -1,
+               [  0x20/8 ...   0x27/8] = 0xfc, /* HACK: PIC */
+               [  0x28/8 ...   0x5f/8] = -1,
+               [  0x60/8 ...   0x67/8] = 0xee, /* HACK: 8042 */
+               [  0x68/8 ...   0x6f/8] = -1,
+               [  0x70/8 ...   0x77/8] = 0xfc, /* rtc */
+               [  0x78/8 ...  0x16f/8] = -1,
+               [ 0x170/8 ...  0x177/8] = 0, /* ide */
+               [ 0x178/8 ...  0x1ef/8] = -1,
+               [ 0x1f0/8 ...  0x1f7/8] = 0, /* ide */
+               [ 0x1f8/8 ...  0x2f7/8] = -1,
+               [ 0x2f8/8 ...  0x2ff/8] = 0, /* serial2 */
+               [ 0x300/8 ...  0x36f/8] = -1,
+               [ 0x370/8 ...  0x377/8] = 0xbf, /* ide */
+               [ 0x378/8 ...  0x3af/8] = -1,
+               [ 0x3b0/8 ...  0x3df/8] = 0, /* VGA */
+               [ 0x3e0/8 ...  0x3ef/8] = -1,
+               [ 0x3f0/8 ...  0x3f7/8] = 0xbf, /* ide */
+               [ 0x3f8/8 ...  0xcf7/8] = -1,
+               [ 0xcf8/8 ...  0xcff/8] = 0, /* HACK: PCI, PIIX3 RCR */
+               [ 0xd00/8 ... 0x5657/8] = -1,
+               [0x5658/8 ... 0x565f/8] = 0xf0, /* vmport */
+               [0x5660/8 ... 0xc03f/8] = -1,
+               [0xc000/8 ... 0xc03f/8] = 0, /* virtio-9p-pci */
+               [0xc040/8 ... 0xc07f/8] = 0, /* e1000 */
+               [0xc080/8 ... 0xc08f/8] = 0, /* piix bmdma */
+               [0xc090/8 ... 0xffff/8] = -1,
+       },
+};
diff --git a/hypervisor/Makefile b/hypervisor/Makefile
new file mode 100644 (file)
index 0000000..2e92e77
--- /dev/null
@@ -0,0 +1,41 @@
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+#  Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+LINUXINCLUDE := -I$(src)/arch/$(SRCARCH)/include -I$(src)/include
+KBUILD_CFLAGS := -g -Os -Wall -Wstrict-prototypes -Wtype-limits \
+                -Wmissing-declarations -Wmissing-prototypes \
+                -fno-strict-aliasing -fpic -fpie -fno-common
+ifneq ($(wildcard $(src)/include/jailhouse/config.h),)
+KBUILD_CFLAGS += -include $(src)/include/jailhouse/config.h
+endif
+
+subdir-y := arch/$(SRCARCH)
+
+always := jailhouse.bin
+
+hypervisor-y := setup.o printk.o paging.o control.o lib.o \
+       arch/$(SRCARCH)/built-in.o hypervisor.lds
+targets += $(hypervisor-y)
+
+HYPERVISOR_OBJS = $(addprefix $(obj)/,$(hypervisor-y))
+
+LDFLAGS_hypervisor.o := -T
+
+targets += hypervisor.o
+$(obj)/hypervisor.o: $(src)/hypervisor.lds $(HYPERVISOR_OBJS)
+       $(call if_changed,ld)
+
+OBJCOPYFLAGS_jailhouse.bin := -O binary
+
+targets += jailhouse.bin
+$(obj)/jailhouse.bin: $(obj)/hypervisor.o
+       $(call if_changed,objcopy)
diff --git a/hypervisor/acpi.c b/hypervisor/acpi.c
new file mode 100644 (file)
index 0000000..d7787fc
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/acpi.h>
+#include <jailhouse/control.h>
+#include <jailhouse/entry.h>
+
+static bool acpi_valid_checksum(const struct acpi_table_header *table)
+{
+       const u8 *pos = (const u8 *)table;
+       const u8 *end = pos + table->length;
+       u8 sum = 0;
+
+       while (pos < end)
+               sum += *pos++;
+       return sum == 0;
+}
+
+const struct acpi_table_header *
+acpi_find_table(char name[4], const struct acpi_table_header *start)
+{
+       void *end = config_memory + system_config->config_memory.size;
+       const struct acpi_table_header *tab;
+       const void *pos;
+
+       pos = start ? ((const void *)start) + start->length : config_memory;
+       while ((pos + sizeof(struct acpi_table_header)) < end) {
+               tab = pos++;
+
+               if (tab->signature == *(u32 *)name &&
+                   (pos + tab->length) < end && acpi_valid_checksum(tab))
+                       return tab;
+       }
+
+       return NULL;
+}
diff --git a/hypervisor/arch/arm/Makefile b/hypervisor/arch/arm/Makefile
new file mode 100644 (file)
index 0000000..d5991cd
--- /dev/null
@@ -0,0 +1,18 @@
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+#  Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+KBUILD_AFLAGS := $(filter-out -include asm/unified.h,$(KBUILD_AFLAGS))
+
+always := built-in.o
+
+#obj-y := dbg-write.o entry.o setup.o fault.o control.o mmio.o
+obj-y := entry.o setup.o
diff --git a/hypervisor/arch/arm/entry.S b/hypervisor/arch/arm/entry.S
new file mode 100644 (file)
index 0000000..c374614
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/percpu.h>
+
+/* Entry point for Linux loader module on JAILHOUSE_ENABLE */
+       .text
+       .globl arch_entry
+arch_entry:
+       mvn %r0,#~-38
+       bx %lr
+
+
+/* Fix up Global Offset Table with absolute hypervisor address */
+       .globl got_init
+got_init:
+       bx %lr
diff --git a/hypervisor/arch/arm/include/asm/bitops.h b/hypervisor/arch/arm/include/asm/bitops.h
new file mode 100644 (file)
index 0000000..18aa608
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_BITOPS_H
+#define _JAILHOUSE_ASM_BITOPS_H
+
+#include <asm/types.h>
+
+static inline __attribute__((always_inline)) void
+clear_bit(int nr, volatile unsigned long *addr)
+{
+}
+
+static inline __attribute__((always_inline)) void
+set_bit(unsigned int nr, volatile unsigned long *addr)
+{
+}
+
+static inline __attribute__((always_inline)) int
+constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
+{
+       return ((1UL << (nr % BITS_PER_LONG)) &
+               (addr[nr / BITS_PER_LONG])) != 0;
+}
+
+static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
+{
+       return 0;
+}
+
+#define test_bit(nr, addr)                     \
+       (__builtin_constant_p((nr))             \
+        ? constant_test_bit((nr), (addr))      \
+        : variable_test_bit((nr), (addr)))
+
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+       return 0;
+}
+
+static inline unsigned long ffz(unsigned long word)
+{
+       return 0;
+}
+
+#endif /* !_JAILHOUSE_ASM_BITOPS_H */
diff --git a/hypervisor/arch/arm/include/asm/cell.h b/hypervisor/arch/arm/include/asm/cell.h
new file mode 100644 (file)
index 0000000..0a9699b
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_CELL_H
+#define _JAILHOUSE_ASM_CELL_H
+
+#include <asm/types.h>
+#include <asm/paging.h>
+
+#include <jailhouse/cell-config.h>
+
+struct cell {
+       char name[JAILHOUSE_CELL_NAME_MAXLEN+1];
+
+       struct cpu_set *cpu_set;
+       struct cpu_set small_cpu_set;
+
+       unsigned long page_offset;
+
+       struct cell *next;
+};
+
+extern struct cell *cell_list;
+
+#endif /* !_JAILHOUSE_ASM_CELL_H */
diff --git a/hypervisor/arch/arm/include/asm/jailhouse.h b/hypervisor/arch/arm/include/asm/jailhouse.h
new file mode 100644 (file)
index 0000000..7f4ef88
--- /dev/null
@@ -0,0 +1,112 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#define JAILHOUSE_CALL_INS             ".arch_extension virt\n\t" \
+                                       "hvc #0x4a48"
+#define JAILHOUSE_CALL_NUM_RESULT      "r0"
+#define JAILHOUSE_CALL_ARG1            "r1"
+#define JAILHOUSE_CALL_ARG2            "r2"
+#define JAILHOUSE_CALL_ARG3            "r3"
+#define JAILHOUSE_CALL_ARG4            "r4"
+
+static inline __u32 jailhouse_call0(__u32 num)
+{
+       register __u32 num_result asm(JAILHOUSE_CALL_NUM_RESULT) = num;
+
+       asm volatile(
+               __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%0")
+               __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%1")
+               JAILHOUSE_CALL_INS
+               : "=r" (num_result)
+               : "r" (num_result)
+               : "memory");
+       return num_result;
+}
+
+static inline __u32 jailhouse_call1(__u32 num, __u32 arg1)
+{
+       register __u32 num_result asm(JAILHOUSE_CALL_NUM_RESULT) = num;
+       register __u32 __arg1 asm(JAILHOUSE_CALL_ARG1) = arg1;
+
+       asm volatile(
+               __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%0")
+               __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%1")
+               __asmeq(JAILHOUSE_CALL_ARG1, "%2")
+               JAILHOUSE_CALL_INS
+               : "=r" (num_result)
+               : "r" (num_result), "r" (__arg1)
+               : "memory");
+       return num_result;
+}
+
+static inline __u32 jailhouse_call2(__u32 num, __u32 arg1, __u32 arg2)
+{
+       register __u32 num_result asm(JAILHOUSE_CALL_NUM_RESULT) = num;
+       register __u32 __arg1 asm(JAILHOUSE_CALL_ARG1) = arg1;
+       register __u32 __arg2 asm(JAILHOUSE_CALL_ARG2) = arg2;
+
+       asm volatile(
+               __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%0")
+               __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%1")
+               __asmeq(JAILHOUSE_CALL_ARG1, "%2")
+               __asmeq(JAILHOUSE_CALL_ARG2, "%3")
+               JAILHOUSE_CALL_INS
+               : "=r" (num_result)
+               : "r" (num_result), "r" (__arg1), "r" (__arg2)
+               : "memory");
+       return num_result;
+}
+
+static inline __u32 jailhouse_call3(__u32 num, __u32 arg1, __u32 arg2,
+                                  __u32 arg3)
+{
+       register __u32 num_result asm(JAILHOUSE_CALL_NUM_RESULT) = num;
+       register __u32 __arg1 asm(JAILHOUSE_CALL_ARG1) = arg1;
+       register __u32 __arg2 asm(JAILHOUSE_CALL_ARG2) = arg2;
+       register __u32 __arg3 asm(JAILHOUSE_CALL_ARG3) = arg3;
+
+       asm volatile(
+               __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%0")
+               __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%1")
+               __asmeq(JAILHOUSE_CALL_ARG1, "%2")
+               __asmeq(JAILHOUSE_CALL_ARG2, "%3")
+               __asmeq(JAILHOUSE_CALL_ARG3, "%4")
+               JAILHOUSE_CALL_INS
+               : "=r" (num_result)
+               : "r" (num_result), "r" (__arg1), "r" (__arg2), "r" (__arg3)
+               : "memory");
+       return num_result;
+}
+
+static inline __u32 jailhouse_call4(__u32 num, __u32 arg1, __u32 arg2,
+                                  __u32 arg3, __u32 arg4)
+{
+       register __u32 num_result asm(JAILHOUSE_CALL_NUM_RESULT) = num;
+       register __u32 __arg1 asm(JAILHOUSE_CALL_ARG1) = arg1;
+       register __u32 __arg2 asm(JAILHOUSE_CALL_ARG2) = arg2;
+       register __u32 __arg3 asm(JAILHOUSE_CALL_ARG3) = arg3;
+       register __u32 __arg4 asm(JAILHOUSE_CALL_ARG4) = arg4;
+
+       asm volatile(
+               __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%0")
+               __asmeq(JAILHOUSE_CALL_NUM_RESULT, "%1")
+               __asmeq(JAILHOUSE_CALL_ARG1, "%2")
+               __asmeq(JAILHOUSE_CALL_ARG2, "%3")
+               __asmeq(JAILHOUSE_CALL_ARG3, "%4")
+               __asmeq(JAILHOUSE_CALL_ARG4, "%5")
+               JAILHOUSE_CALL_INS
+               : "=r" (num_result)
+               : "r" (num_result), "r" (__arg1), "r" (__arg2), "r" (__arg3),
+                 "r" (__arg4)
+               : "memory");
+       return num_result;
+}
diff --git a/hypervisor/arch/arm/include/asm/paging.h b/hypervisor/arch/arm/include/asm/paging.h
new file mode 100644 (file)
index 0000000..d8469e9
--- /dev/null
@@ -0,0 +1,197 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PAGING_H
+#define _JAILHOUSE_ASM_PAGING_H
+
+#include <asm/types.h>
+#include <asm/processor.h>
+
+#define PAGE_SIZE              4096
+#define PAGE_MASK              ~(PAGE_SIZE - 1)
+
+#define PAGE_DIR_LEVELS                4
+
+#define PAGE_TABLE_OFFS_MASK   0x00000ff8UL
+#define PAGE_ADDR_MASK         0xfffff000UL
+#define PAGE_OFFS_MASK         0x00000fffUL
+#define HUGEPAGE_ADDR_MASK     0xffe00000UL
+#define HUGEPAGE_OFFS_MASK     0x001fffffUL
+
+#define PAGE_FLAG_PRESENT      0x01
+#define PAGE_FLAG_RW           0x02
+#define PAGE_FLAG_SUPERVISOR   0x04
+#define PAGE_FLAG_UNCACHED     0x10
+
+#define PAGE_DEFAULT_FLAGS     (PAGE_FLAG_PRESENT | PAGE_FLAG_RW | \
+                                PAGE_FLAG_SUPERVISOR)
+#define PAGE_READONLY_FLAGS    (PAGE_FLAG_PRESENT | PAGE_FLAG_SUPERVISOR)
+
+#define INVALID_PHYS_ADDR      (~0UL)
+
+#define REMAP_BASE_ADDR                0x00100000UL
+#define NUM_REMAP_BITMAP_PAGES 1
+
+#define FOREIGN_MAPPING_BASE   REMAP_BASE_ADDR
+#define NUM_FOREIGN_PAGES      16
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long pgd_t;
+typedef unsigned long pud_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pte_t;
+
+static inline bool pgd_valid(pgd_t *pgd)
+{
+       return *pgd & 1;
+}
+
+static inline pgd_t *pgd_offset(pgd_t *page_table, unsigned long addr)
+{
+       return NULL;
+}
+
+static inline void set_pgd(pgd_t *pgd, unsigned long addr, unsigned long flags)
+{
+       *pgd = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pgd(pgd_t *pgd)
+{
+       *pgd = 0;
+}
+
+static inline bool pud_valid(pud_t *pud)
+{
+       return *pud & 1;
+}
+
+static inline pud_t *pud4l_offset(pgd_t *pgd, unsigned long page_table_offset,
+                                 unsigned long addr)
+{
+       return NULL;
+}
+
+static inline pud_t *pud3l_offset(pgd_t *page_table, unsigned long addr)
+{
+       return NULL;
+}
+
+static inline void set_pud(pud_t *pud, unsigned long addr, unsigned long flags)
+{
+       *pud = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pud(pud_t *pud)
+{
+       *pud = 0;
+}
+
+static inline bool pmd_valid(pmd_t *pmd)
+{
+       return *pmd & 1;
+}
+
+static inline bool pmd_is_hugepage(pmd_t *pmd)
+{
+       return *pmd & (1 << 7);
+}
+
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long page_table_offset,
+                               unsigned long addr)
+{
+       return NULL;
+}
+
+static inline void set_pmd(pmd_t *pmd, unsigned long addr, unsigned long flags)
+{
+       *pmd = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pmd(pmd_t *pmd)
+{
+       *pmd = 0;
+}
+
+static inline bool pte_valid(pte_t *pte)
+{
+       return *pte & 1;
+}
+
+static inline pte_t *pte_offset(pmd_t *pmd, unsigned long page_table_offset,
+                               unsigned long addr)
+{
+       return NULL;
+}
+
+static inline void set_pte(pte_t *pte, unsigned long addr, unsigned long flags)
+{
+       *pte = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pte(pte_t *pte)
+{
+       *pte = 0;
+}
+
+static inline unsigned long phys_address(pte_t *pte, unsigned long addr)
+{
+       return (*pte & PAGE_ADDR_MASK) + (addr & PAGE_OFFS_MASK);
+}
+
+static inline unsigned long phys_address_hugepage(pmd_t *pmd,
+                                                 unsigned long addr)
+{
+       return (*pmd & HUGEPAGE_ADDR_MASK) + (addr & HUGEPAGE_OFFS_MASK);
+}
+
+static inline bool pud_empty(pgd_t *pgd, unsigned long page_table_offset)
+{
+       pud_t *pud = (pud_t *)((*pgd & PAGE_ADDR_MASK) + page_table_offset);
+       int n;
+
+       for (n = 0; n < PAGE_SIZE / sizeof(pud_t); n++, pud++)
+               if (pud_valid(pud))
+                       return false;
+       return true;
+}
+
+static inline bool pmd_empty(pud_t *pud, unsigned long page_table_offset)
+{
+       pmd_t *pmd = (pmd_t *)((*pud & PAGE_ADDR_MASK) + page_table_offset);
+       int n;
+
+       for (n = 0; n < PAGE_SIZE / sizeof(pmd_t); n++, pmd++)
+               if (pmd_valid(pmd))
+                       return false;
+       return true;
+}
+
+static inline bool pt_empty(pmd_t *pmd, unsigned long page_table_offset)
+{
+       pte_t *pte = (pte_t *)((*pmd & PAGE_ADDR_MASK) + page_table_offset);
+       int n;
+
+       for (n = 0; n < PAGE_SIZE / sizeof(pte_t); n++, pte++)
+               if (pte_valid(pte))
+                       return false;
+       return true;
+}
+
+static inline void flush_tlb(void)
+{
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PAGING_H */
diff --git a/hypervisor/arch/arm/include/asm/percpu.h b/hypervisor/arch/arm/include/asm/percpu.h
new file mode 100644 (file)
index 0000000..037bf94
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PERCPU_H
+#define _JAILHOUSE_ASM_PERCPU_H
+
+#include <asm/types.h>
+#include <asm/paging.h>
+
+#define NUM_ENTRY_REGS                 6
+
+/* Keep in sync with struct per_cpu! */
+#define PERCPU_SIZE_SHIFT              13
+#define PERCPU_STACK_END               PAGE_SIZE
+#define PERCPU_LINUX_SP                        PERCPU_STACK_END
+#define PERCPU_CPU_ID                  (PERCPU_LINUX_SP + 4)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/cell.h>
+
+struct per_cpu {
+       /* Keep these three in sync with defines above! */
+       u8 stack[PAGE_SIZE];
+       unsigned long linux_sp;
+       unsigned int cpu_id;
+
+//     u32 apic_id;
+       struct cell *cell;
+
+       unsigned long linux_reg[NUM_ENTRY_REGS];
+//     unsigned long linux_ip;
+       bool initialized;
+
+       volatile bool stop_cpu;
+       volatile bool wait_for_sipi;
+       volatile bool cpu_stopped;
+       bool init_signaled;
+       int sipi_vector;
+       bool flush_caches;
+       bool shutdown_cpu;
+} __attribute__((aligned(PAGE_SIZE)));
+
+static inline struct per_cpu *per_cpu(unsigned int cpu)
+{
+       extern u8 __page_pool[];
+
+       return (struct per_cpu *)(__page_pool + (cpu << PERCPU_SIZE_SHIFT));
+}
+
+/* Validate defines */
+#define CHECK_ASSUMPTION(assume)       ((void)sizeof(char[1 - 2*!(assume)]))
+
+static inline void __check_assumptions(void)
+{
+       struct per_cpu cpu_data;
+
+       CHECK_ASSUMPTION(sizeof(struct per_cpu) == (1 << PERCPU_SIZE_SHIFT));
+       CHECK_ASSUMPTION(sizeof(cpu_data.stack) == PERCPU_STACK_END);
+       CHECK_ASSUMPTION(__builtin_offsetof(struct per_cpu, linux_sp) ==
+                        PERCPU_LINUX_SP);
+       CHECK_ASSUMPTION(__builtin_offsetof(struct per_cpu, cpu_id) ==
+                        PERCPU_CPU_ID);
+}
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PERCPU_H */
diff --git a/hypervisor/arch/arm/include/asm/processor.h b/hypervisor/arch/arm/include/asm/processor.h
new file mode 100644 (file)
index 0000000..d42be81
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PROCESSOR_H
+#define _JAILHOUSE_ASM_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+struct registers {
+};
+
+static inline void cpu_relax(void)
+{
+}
+
+static inline void memory_barrier(void)
+{
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PROCESSOR_H */
diff --git a/hypervisor/arch/arm/include/asm/spinlock.h b/hypervisor/arch/arm/include/asm/spinlock.h
new file mode 100644 (file)
index 0000000..d5cb68c
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/bitops.h>
+#include <asm/processor.h>
+
+typedef struct {
+       unsigned long state;
+} spinlock_t;
+
+#define DEFINE_SPINLOCK(name)  spinlock_t (name)
+
+static inline void spin_lock(spinlock_t *lock)
+{
+//     while (test_and_set_bit(0, &lock->state))
+//             cpu_relax();
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+//     asm volatile("": : :"memory");
+//     clear_bit(0, &lock->state);
+}
diff --git a/hypervisor/arch/arm/include/asm/types.h b/hypervisor/arch/arm/include/asm/types.h
new file mode 100644 (file)
index 0000000..19fd3a5
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_TYPES_H
+#define _JAILHOUSE_ASM_TYPES_H
+
+#define NULL                           ((void *)0)
+
+#define BITS_PER_LONG                  32
+
+#ifndef __ASSEMBLY__
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+typedef s8 __s8;
+typedef u8 __u8;
+
+typedef s16 __s16;
+typedef u16 __u16;
+
+typedef s32 __s32;
+typedef u32 __u32;
+
+typedef s64 __s64;
+typedef u64 __u64;
+
+typedef enum { true=1, false=0 } bool;
+
+struct cpu_set {
+       unsigned long max_cpu_id;
+       /* Note: The bitmap is supposed to be extended by embedding this
+        * struct into a larger buffer. */
+       unsigned long bitmap[1];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_TYPES_H */
diff --git a/hypervisor/arch/arm/setup.c b/hypervisor/arch/arm/setup.c
new file mode 100644 (file)
index 0000000..c3fcda0
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/entry.h>
+
+int arch_init_early(struct cell *linux_cell,
+                   struct jailhouse_cell_desc *config)
+{
+       return -ENOSYS;
+}
+
+int arch_cpu_init(struct per_cpu *cpu_data)
+{
+       return -ENOSYS;
+}
+
+int arch_init_late(struct cell *linux_cell,
+                  struct jailhouse_cell_desc *config)
+{
+       return -ENOSYS;
+}
+
+void arch_cpu_activate_vmm(struct per_cpu *cpu_data)
+{
+       while (1);
+}
+
+void arch_cpu_restore(struct per_cpu *cpu_data)
+{
+}
+
+// catch missing symbols
+#include <jailhouse/printk.h>
+#include <jailhouse/processor.h>
+#include <jailhouse/control.h>
+#include <jailhouse/string.h>
+void arch_dbg_write_init(void) {}
+int phys_processor_id(void) { return 0; }
+void arch_suspend_cpu(unsigned int cpu_id) {}
+void arch_resume_cpu(unsigned int cpu_id) {}
+void arch_reset_cpu(unsigned int cpu_id) {}
+void arch_shutdown_cpu(unsigned int cpu_id) {}
+int arch_cell_create(struct per_cpu *cpu_data, struct cell *new_cell,
+                    struct jailhouse_cell_desc *config) { return -ENOSYS; }
+void *memcpy(void *dest, const void *src, unsigned long n) { return NULL; }
+void arch_dbg_write(const char *msg) {}
diff --git a/hypervisor/arch/x86/Makefile b/hypervisor/arch/x86/Makefile
new file mode 100644 (file)
index 0000000..927804f
--- /dev/null
@@ -0,0 +1,16 @@
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+#  Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+always := built-in.o
+
+obj-y := apic.o dbg-write.o entry.o setup.o fault.o vmx.o control.o mmio.o \
+        ../../acpi.o
diff --git a/hypervisor/arch/x86/apic.c b/hypervisor/arch/x86/apic.c
new file mode 100644 (file)
index 0000000..f22d28f
--- /dev/null
@@ -0,0 +1,439 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/processor.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/control.h>
+#include <jailhouse/mmio.h>
+#include <asm/apic.h>
+#include <asm/bitops.h>
+#include <asm/fault.h>
+#include <asm/spinlock.h>
+#include <asm/vmx.h>
+
+bool using_x2apic;
+
+static u8 apic_to_cpu_id[] = { [0 ... APIC_MAX_PHYS_ID] = APIC_INVALID_ID };
+static DEFINE_SPINLOCK(wait_lock);
+static void *xapic_page;
+
+static struct {
+       u32 (*read)(unsigned int reg);
+       u32 (*read_id)(void);
+       void (*write)(unsigned int reg, u32 val);
+       void (*send_ipi)(u32 apic_id, u32 icr_lo);
+} apic_ops;
+
+static u32 read_xapic(unsigned int reg)
+{
+       return *(volatile u32 *)(xapic_page + (reg << 4));
+}
+
+static u32 read_xapic_id(void)
+{
+       return *(volatile u32 *)(xapic_page + (APIC_REG_ID << 4)) >> 24;
+}
+
+static void write_xapic(unsigned int reg, u32 val)
+{
+       *(volatile u32 *)(xapic_page + (reg << 4)) = val;
+}
+
+static void send_xapic_ipi(u32 apic_id, u32 icr_lo)
+{
+       while (read_xapic(APIC_REG_ICR) & APIC_ICR_DS_PENDING)
+               cpu_relax();
+       *(volatile u32 *)(xapic_page + (APIC_REG_ICR_HI << 4)) = apic_id << 24;
+       *(volatile u32 *)(xapic_page + (APIC_REG_ICR << 4)) = icr_lo;
+}
+
+static u32 read_x2apic(unsigned int reg)
+{
+       return read_msr(MSR_X2APIC_BASE + reg);
+}
+
+static u32 read_x2apic_id(void)
+{
+       return read_msr(MSR_X2APIC_BASE + APIC_REG_ID);
+}
+
+static void write_x2apic(unsigned int reg, u32 val)
+{
+       write_msr(MSR_X2APIC_BASE + reg, val);
+}
+
+static void send_x2apic_ipi(u32 apic_id, u32 icr_lo)
+{
+       write_msr(MSR_X2APIC_BASE + APIC_REG_ICR,
+                 ((unsigned long)apic_id) << 32 | icr_lo);
+}
+
+int phys_processor_id(void)
+{
+       return apic_ops.read_id();
+}
+
+int apic_cpu_init(struct per_cpu *cpu_data)
+{
+       unsigned int apic_id = phys_processor_id();
+       unsigned int cpu_id = cpu_data->cpu_id;
+
+       printk("(APIC ID %d) ", apic_id);
+
+       if (apic_id > APIC_MAX_PHYS_ID)
+               return -ERANGE;
+       if (apic_to_cpu_id[apic_id] != APIC_INVALID_ID)
+               return -EBUSY;
+       /* only flat mode with LDR corresponding to logical ID supported */
+       if (!using_x2apic && (apic_ops.read(APIC_REG_DFR) != 0xffffffff ||
+           apic_ops.read(APIC_REG_LDR) != 1UL << (cpu_id + 24)))
+               return -EINVAL;
+
+       apic_to_cpu_id[apic_id] = cpu_id;
+       cpu_data->apic_id = apic_id;
+       return 0;
+}
+
+int apic_init(void)
+{
+       unsigned long apicbase;
+       int err;
+
+       apicbase = read_msr(MSR_IA32_APICBASE);
+
+       if (apicbase & APIC_BASE_EXTD) {
+               /* set programmatically to enable address fixup */
+               apic_ops.read = read_x2apic;
+               apic_ops.read_id = read_x2apic_id;
+               apic_ops.write = write_x2apic;
+               apic_ops.send_ipi = send_x2apic_ipi;
+               using_x2apic = true;
+       } else if (apicbase & APIC_BASE_EN) {
+               xapic_page = page_alloc(&remap_pool, 1);
+               if (!xapic_page)
+                       return -ENOMEM;
+               err = page_map_create(hv_page_table, XAPIC_BASE, PAGE_SIZE,
+                                     (unsigned long)xapic_page,
+                                     PAGE_DEFAULT_FLAGS | PAGE_FLAG_UNCACHED,
+                                     PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
+               if (err)
+                       return err;
+               apic_ops.read = read_xapic;
+               apic_ops.read_id = read_xapic_id;
+               apic_ops.write = write_xapic;
+               apic_ops.send_ipi = send_xapic_ipi;
+       } else
+               return -EIO;
+
+       printk("Using x%sAPIC\n", using_x2apic ? "2" : "");
+
+       return 0;
+}
+
+void arch_suspend_cpu(unsigned int cpu_id)
+{
+       struct per_cpu *target_data = per_cpu(cpu_id);
+       bool target_stopped;
+
+       spin_lock(&wait_lock);
+
+       target_data->stop_cpu = true;
+       target_stopped = target_data->cpu_stopped;
+
+       spin_unlock(&wait_lock);
+
+       if (!target_stopped) {
+               apic_ops.send_ipi(target_data->apic_id,
+                                 APIC_ICR_DLVR_NMI |
+                                 APIC_ICR_DEST_PHYSICAL |
+                                 APIC_ICR_LV_ASSERT |
+                                 APIC_ICR_TM_EDGE |
+                                 APIC_ICR_SH_NONE);
+
+               while (!target_data->cpu_stopped)
+                       cpu_relax();
+       }
+}
+
+void arch_resume_cpu(unsigned int cpu_id)
+{
+       /* make any state changes visible before releasing the CPU */
+       memory_barrier();
+
+       per_cpu(cpu_id)->stop_cpu = false;
+}
+
+/* target cpu has to be stopped */
+void arch_reset_cpu(unsigned int cpu_id)
+{
+       per_cpu(cpu_id)->sipi_vector = APIC_BSP_PSEUDO_SIPI;
+
+       arch_resume_cpu(cpu_id);
+}
+
+void arch_shutdown_cpu(unsigned int cpu_id)
+{
+       arch_suspend_cpu(cpu_id);
+       per_cpu(cpu_id)->shutdown_cpu = true;
+       arch_resume_cpu(cpu_id);
+       /*
+        * Note: The caller has to ensure that the target CPU has enough time
+        * to reach the shutdown position before destroying the code path it
+        * has to take to get there. This can be ensured by bringing the CPU
+        * online again under Linux before cleaning up the hypervisor.
+        */
+}
+
+void apic_nmi_handler(struct per_cpu *cpu_data)
+{
+       vmx_schedule_vmexit(cpu_data);
+}
+
+int apic_handle_events(struct per_cpu *cpu_data)
+{
+       spin_lock(&wait_lock);
+
+       do {
+               if (cpu_data->init_signaled) {
+                       cpu_data->init_signaled = false;
+                       cpu_data->wait_for_sipi = true;
+               } else
+                       cpu_data->sipi_vector = -1;
+
+               cpu_data->cpu_stopped = true;
+
+               spin_unlock(&wait_lock);
+
+               while (cpu_data->wait_for_sipi || cpu_data->stop_cpu)
+                       cpu_relax();
+
+               if (cpu_data->shutdown_cpu) {
+                       /* disable APIC */
+                       apic_ops.write(APIC_REG_SPIV, 0);
+                       vmx_cpu_exit(cpu_data);
+                       asm volatile("hlt");
+               }
+
+               spin_lock(&wait_lock);
+
+               cpu_data->cpu_stopped = false;
+       } while (cpu_data->init_signaled);
+
+       if (cpu_data->flush_caches) {
+               cpu_data->flush_caches = false;
+               flush_tlb();
+               vmx_invept();
+       }
+
+       spin_unlock(&wait_lock);
+
+       return cpu_data->sipi_vector;
+}
+
+static void apic_validate_ipi_mode(struct per_cpu *cpu_data, u32 lo_val)
+{
+       switch (lo_val & APIC_ICR_DLVR_MASK) {
+       case APIC_ICR_DLVR_INIT:
+       case APIC_ICR_DLVR_FIXED:
+       case APIC_ICR_DLVR_LOWPRI:
+       case APIC_ICR_DLVR_NMI:
+       case APIC_ICR_DLVR_SIPI:
+               break;
+       default:
+               panic_printk("FATAL: Unsupported APIC delivery mode, "
+                            "ICR.lo=%x\n", lo_val);
+               panic_stop(cpu_data);
+       }
+
+       switch (lo_val & APIC_ICR_SH_MASK) {
+       case APIC_ICR_SH_NONE:
+       case APIC_ICR_SH_SELF:
+               break;
+       default:
+               panic_printk("FATAL: Unsupported shorthand, ICR.lo=%x\n",
+                            lo_val);
+               panic_stop(cpu_data);
+       }
+}
+
+static void apic_deliver_ipi(struct per_cpu *cpu_data,
+                            unsigned int target_cpu_id,
+                            u32 orig_icr_hi, u32 icr_lo)
+{
+       struct per_cpu *target_data;
+
+       if (target_cpu_id == APIC_INVALID_ID ||
+           !test_bit(target_cpu_id, cpu_data->cell->cpu_set->bitmap)) {
+               printk("WARNING: CPU %d specified IPI destination outside "
+                      "cell boundaries, ICR.hi=%x\n",
+                      cpu_data->cpu_id, orig_icr_hi);
+               return;
+       }
+
+       target_data = per_cpu(target_cpu_id);
+
+       switch (icr_lo & APIC_ICR_DLVR_MASK) {
+       case APIC_ICR_DLVR_NMI:
+               /* TODO: must be sent via hypervisor */
+               printk("Ignoring NMI IPI\n");
+               return;
+       case APIC_ICR_DLVR_INIT:
+               spin_lock(&wait_lock);
+
+               if (!target_data->wait_for_sipi)
+                       target_data->init_signaled = true;
+
+               spin_unlock(&wait_lock);
+
+               apic_ops.send_ipi(target_data->apic_id,
+                                 APIC_ICR_DLVR_NMI |
+                                 APIC_ICR_DEST_PHYSICAL |
+                                 APIC_ICR_LV_ASSERT |
+                                 APIC_ICR_TM_EDGE |
+                                 APIC_ICR_SH_NONE);
+               return;
+       case APIC_ICR_DLVR_SIPI:
+               target_data = per_cpu(target_cpu_id);
+
+               spin_lock(&wait_lock);
+
+               if (target_data->wait_for_sipi) {
+                       target_data->wait_for_sipi = false;
+                       target_data->sipi_vector =
+                               icr_lo & APIC_ICR_VECTOR_MASK;
+               }
+
+               spin_unlock(&wait_lock);
+               return;
+       }
+
+       apic_ops.send_ipi(target_data->apic_id, icr_lo);
+}
+
+static void apic_deliver_logical_dest_ipi(struct per_cpu *cpu_data,
+                                         unsigned long dest, u32 lo_val,
+                                         u32 hi_val)
+{
+       unsigned int target_cpu_id;
+       unsigned int logical_id;
+       unsigned int cluster_id;
+       unsigned long dest_mask;
+       unsigned int apic_id;
+
+       if (using_x2apic) {
+               cluster_id = (dest & X2APIC_DEST_CLUSTER_ID_MASK) >>
+                       X2APIC_DEST_CLUSTER_ID_SHIFT;
+               dest_mask = ~(dest & X2APIC_DEST_LOGICAL_ID_MASK);
+               while (dest_mask != ~0UL) {
+                       logical_id = ffz(dest_mask);
+                       dest_mask |= 1UL << logical_id;
+                       apic_id = logical_id |
+                               (cluster_id << X2APIC_CLUSTER_ID_SHIFT);
+                       target_cpu_id = apic_to_cpu_id[apic_id];
+                       apic_deliver_ipi(cpu_data, target_cpu_id, hi_val,
+                                        lo_val);
+               }
+       } else {
+               dest_mask = ~dest;
+               while (dest_mask != ~0UL) {
+                       target_cpu_id = ffz(dest_mask);
+                       dest_mask |= 1UL << target_cpu_id;
+                       apic_deliver_ipi(cpu_data, target_cpu_id, hi_val,
+                                        lo_val);
+               }
+       }
+}
+
+void apic_handle_icr_write(struct per_cpu *cpu_data, u32 lo_val, u32 hi_val)
+{
+       unsigned int target_cpu_id;
+       unsigned long dest;
+
+       apic_validate_ipi_mode(cpu_data, lo_val);
+
+       dest = hi_val;
+       if (!using_x2apic)
+               dest >>= 24;
+
+       if (lo_val & APIC_ICR_DEST_LOGICAL) {
+               lo_val &= ~APIC_ICR_DEST_LOGICAL;
+               apic_deliver_logical_dest_ipi(cpu_data, dest, lo_val, hi_val);
+       } else {
+               target_cpu_id = APIC_INVALID_ID;
+               if (dest <= APIC_MAX_PHYS_ID)
+                       target_cpu_id = apic_to_cpu_id[dest];
+               apic_deliver_ipi(cpu_data, target_cpu_id, hi_val, lo_val);
+       }
+}
+
+unsigned int apic_mmio_access(struct registers *guest_regs,
+                             struct per_cpu *cpu_data, unsigned long rip,
+                             unsigned long page_table_addr, unsigned int reg,
+                             bool is_write)
+{
+       struct mmio_access access;
+       unsigned long val;
+
+       access = mmio_parse(cpu_data, rip, page_table_addr, is_write);
+       if (access.inst_len == 0)
+               return 0;
+       if (access.size != 4) {
+               panic_printk("FATAL: Unsupported APIC access width %d\n",
+                            access.size);
+               return 0;
+       }
+       if (is_write) {
+               val = ((unsigned long *)guest_regs)[access.reg];
+               if (reg == APIC_REG_ICR) {
+                       apic_handle_icr_write(cpu_data, val,
+                                             apic_ops.read(APIC_REG_ICR_HI));
+               } else if (reg == APIC_REG_LDR &&
+                        val != 1UL << (cpu_data->cpu_id + 24)) {
+                       panic_printk("FATAL: Unsupported change to LDR: %x\n",
+                                    val);
+                       return 0;
+               } else if (reg == APIC_REG_DFR && val != 0xffffffff) {
+                       panic_printk("FATAL: Unsupported change to DFR: %x\n",
+                                    val);
+                       return 0;
+               } else
+                       apic_ops.write(reg, val);
+       } else {
+               val = apic_ops.read(reg);
+               ((unsigned long *)guest_regs)[access.reg] = val;
+       }
+       return access.inst_len;
+}
+
+void x2apic_handle_write(struct registers *guest_regs)
+{
+       u32 reg = guest_regs->rcx;
+
+       if (reg == MSR_X2APIC_SELF_IPI)
+               /* TODO: emulate */
+               printk("Unhandled x2APIC self IPI write\n");
+       else
+               apic_ops.write(reg - MSR_X2APIC_BASE, guest_regs->rax);
+}
+
+void x2apic_handle_read(struct registers *guest_regs)
+{
+       u32 reg = guest_regs->rcx;
+
+       guest_regs->rax &= ~0xffffffffUL;
+       guest_regs->rax |= apic_ops.read(reg - MSR_X2APIC_BASE);
+
+       guest_regs->rdx &= ~0xffffffffUL;
+       if (reg == MSR_X2APIC_ICR)
+               guest_regs->rdx |= apic_ops.read(reg - MSR_X2APIC_BASE + 1);
+}
diff --git a/hypervisor/arch/x86/control.c b/hypervisor/arch/x86/control.c
new file mode 100644 (file)
index 0000000..f8c1f53
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/control.h>
+#include <asm/vmx.h>
+
+int arch_cell_create(struct per_cpu *cpu_data, struct cell *new_cell,
+                    struct jailhouse_cell_desc *config)
+{
+       unsigned int cpu;
+
+       vmx_cell_shrink(cpu_data->cell, config);
+
+       for_each_cpu_except(cpu, cpu_data->cell->cpu_set, cpu_data->cpu_id)
+               per_cpu(cpu)->flush_caches = true;
+
+       return vmx_cell_init(new_cell, config);
+}
diff --git a/hypervisor/arch/x86/dbg-write.c b/hypervisor/arch/x86/dbg-write.c
new file mode 100644 (file)
index 0000000..3614c67
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/printk.h>
+#include <jailhouse/processor.h>
+#include <asm/io.h>
+
+#ifdef CONFIG_UART_OXPCIE952
+#define UART_BASE              0xe010
+#else
+#define UART_BASE              0x3f8
+#endif
+#define  UART_TX               0x0
+#define  UART_DLL              0x0
+#define  UART_DLM              0x1
+#define  UART_LCR              0x3
+#define  UART_LCR_8N1          0x03
+#define  UART_LCR_DLAB         0x80
+#define  UART_LSR              0x5
+#define  UART_LSR_THRE         0x20
+
+void arch_dbg_write_init(void)
+{
+       outb(UART_LCR_DLAB, UART_BASE + UART_LCR);
+#ifdef CONFIG_UART_OXPCIE952
+       outb(0x22, UART_BASE + UART_DLL);
+#else
+       outb(1, UART_BASE + UART_DLL);
+#endif
+       outb(0, UART_BASE + UART_DLM);
+       outb(UART_LCR_8N1, UART_BASE + UART_LCR);
+}
+
+void arch_dbg_write(const char *msg)
+{
+       char c;
+
+       while (1) {
+               c = *msg++;
+               if (!c)
+                       break;
+               while (!(inb(UART_BASE + UART_LSR) & UART_LSR_THRE))
+                       cpu_relax();
+               if (panic_in_progress && panic_cpu != phys_processor_id())
+                       break;
+               outb(c, UART_BASE + UART_TX);
+       }
+}
diff --git a/hypervisor/arch/x86/entry.S b/hypervisor/arch/x86/entry.S
new file mode 100644 (file)
index 0000000..f20fc33
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/percpu.h>
+
+/* Entry point for Linux loader module on JAILHOUSE_ENABLE */
+       .text
+       .globl arch_entry
+arch_entry:
+       cli
+
+       push %rbp
+       push %rbx
+       push %r12
+       push %r13
+       push %r14
+       push %r15
+
+       mov %rdi,%rdx
+       shl $PERCPU_SIZE_SHIFT,%rdi
+       lea __page_pool(%rip),%rax
+       add %rax,%rdi
+
+       mov %rsp,PERCPU_LINUX_SP(%rdi)
+       mov %edx,PERCPU_CPU_ID(%rdi)
+
+       lea PERCPU_STACK_END-8(%rdi),%rsp
+
+       push %rdi
+
+       call entry
+
+       pop %rdi
+
+       mov PERCPU_LINUX_SP(%rdi),%rsp
+
+       pop %r15
+       pop %r14
+       pop %r13
+       pop %r12
+       pop %rbx
+       pop %rbp
+
+       ret
+
+
+/* Fix up Global Offset Table with absolute hypervisor address */
+       .globl got_init
+got_init:
+       lea __got_start(%rip),%rdx
+       lea __got_end(%rip),%rcx
+       lea hypervisor_header(%rip),%rax
+
+got_loop:
+       cmp %rdx,%rcx
+       je got_done
+
+       add %rax,(%rdx)
+       add $8,%rdx
+       jmp got_loop
+
+got_done:
+       ret
+
+
+/* Exception/interrupt entry points */
+.macro no_error_entry vector
+       .balign 16
+       pushq $-1
+       pushq $\vector
+       mov %rsp,%rdi
+       call exception_handler
+1:     jmp 1b
+.endm
+
+.macro error_entry vector
+       .balign 16
+       pushq $\vector
+       mov %rsp,%rdi
+       call exception_handler
+1:     jmp 1b
+.endm
+
+       .global exception_entries
+       .balign 16
+exception_entries:
+       no_error_entry 0
+       no_error_entry 1
+vector=3
+.rept 5
+       no_error_entry vector
+       vector=vector+1
+.endr
+       error_entry 8
+       no_error_entry 9
+vector=10
+.rept 5
+       error_entry vector
+       vector=vector+1
+.endr
+       no_error_entry 16
+       error_entry 17
+       no_error_entry 18
+       no_error_entry 19
+
+
+       .global nmi_entry
+       .balign 16
+nmi_entry:
+       push %rax
+       push %rcx
+       push %rdx
+       push %rsi
+       push %rdi
+       push %r8
+       push %r9
+       push %r10
+       push %r11
+
+       mov %rsp,%rdi
+       and $PAGE_MASK,%rdi
+       call apic_nmi_handler
+
+       pop %r11
+       pop %r10
+       pop %r9
+       pop %r8
+       pop %rdi
+       pop %rsi
+       pop %rdx
+       pop %rcx
+       pop %rax
+
+       iretq
+
+
+/* VM-exit handling */
+       .globl vm_exit
+vm_exit:
+       push %rax
+       push %rcx
+       push %rdx
+       push %rbx
+       sub $8,%rsp /* placeholder for rsp */
+       push %rbp
+       push %rsi
+       push %rdi
+       push %r8
+       push %r9
+       push %r10
+       push %r11
+       push %r12
+       push %r13
+       push %r14
+       push %r15
+
+       mov %rsp,%rdi
+       lea -PERCPU_STACK_END+16*8(%rsp),%rsi
+       call vmx_handle_exit
+
+       pop %r15
+       pop %r14
+       pop %r13
+       pop %r12
+       pop %r11
+       pop %r10
+       pop %r9
+       pop %r8
+       pop %rdi
+       pop %rsi
+       pop %rbp
+       add $8,%rsp
+       pop %rbx
+       pop %rdx
+       pop %rcx
+       pop %rax
+
+       vmresume
+
+       lea -PERCPU_STACK_END(%rsp),%rdi
+       jmp vmx_entry_failure
diff --git a/hypervisor/arch/x86/fault.c b/hypervisor/arch/x86/fault.c
new file mode 100644 (file)
index 0000000..0aacf42
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/printk.h>
+#include <jailhouse/processor.h>
+#include <asm/types.h>
+#include <asm/fault.h>
+#include <asm/vmx.h>
+
+struct exception_frame {
+       u64 vector;
+       u64 error;
+       u64 rip;
+       u64 cs;
+       u64 flags;
+       u64 rsp;
+       u64 ss;
+};
+
+void exception_handler(struct exception_frame *frame)
+{
+       panic_printk("FATAL: Jailhouse triggered exception #%d\n",
+                    frame->vector);
+       if (frame->error != -1)
+               panic_printk("Error code: %x\n", frame->error);
+       panic_printk("Physical CPU ID: %d\n", phys_processor_id());
+       panic_printk("RIP: %p RSP: %p FLAGS: %x\n", frame->rip, frame->rsp,
+                    frame->flags);
+
+       panic_stop(NULL);
+}
+
+void panic_stop(struct per_cpu *cpu_data)
+{
+       panic_printk("Stopping CPU");
+       if (cpu_data) {
+               panic_printk(" %d", cpu_data->cpu_id);
+               cpu_data->cpu_stopped = true;
+               vmx_cpu_exit(cpu_data);
+       }
+       panic_printk("\n");
+
+       if (phys_processor_id() == panic_cpu)
+               panic_in_progress = 0;
+
+       asm volatile("1: hlt; jmp 1b");
+       __builtin_unreachable();
+}
diff --git a/hypervisor/arch/x86/include/asm/apic.h b/hypervisor/arch/x86/include/asm/apic.h
new file mode 100644 (file)
index 0000000..c0a7abe
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/percpu.h>
+
+/* currently our limit due to fixed-size APID ID map */
+#define APIC_MAX_PHYS_ID               254
+#define APIC_INVALID_ID                        255
+
+#define XAPIC_BASE                     0xfee00000
+
+#define APIC_BASE_EXTD                 (1 << 10)
+#define APIC_BASE_EN                   (1 << 11)
+
+#define APIC_REG_ID                    0x02
+#define APIC_REG_LDR                   0x0d
+#define APIC_REG_DFR                   0x0e
+#define APIC_REG_SPIV                  0x0f
+#define APIC_REG_ICR                   0x30
+#define APIC_REG_ICR_HI                        0x31
+
+#define APIC_ICR_VECTOR_MASK           0x000000ff
+#define APIC_ICR_DLVR_MASK             0x00000700
+#define  APIC_ICR_DLVR_FIXED           0x00000000
+#define  APIC_ICR_DLVR_LOWPRI          0x00000100
+#define  APIC_ICR_DLVR_SMI             0x00000200
+#define  APIC_ICR_DLVR_NMI             0x00000400
+#define  APIC_ICR_DLVR_INIT            0x00000500
+#define  APIC_ICR_DLVR_SIPI            0x00000600
+#define APIC_ICR_DEST_PHYSICAL         0x00000000
+#define APIC_ICR_DEST_LOGICAL          0x00000800
+#define APIC_ICR_DS_PENDING            0x00001000
+#define APIC_ICR_LV_DEASSERT           0x00000000
+#define APIC_ICR_LV_ASSERT             0x00004000
+#define APIC_ICR_TM_EDGE               0x00000000
+#define APIC_ICR_TM_LEVEL              0x00008000
+#define APIC_ICR_SH_MASK               0x000c0000
+#define  APIC_ICR_SH_NONE              0x00000000
+#define  APIC_ICR_SH_SELF              0x00040000
+#define  APIC_ICR_SH_ALL               0x00080000
+#define  APIC_ICR_SH_ALLOTHER          0x000c0000
+
+#define X2APIC_DEST_LOGICAL_ID_MASK    0x0000ffff
+#define X2APIC_DEST_CLUSTER_ID_MASK    0xffff0000
+#define X2APIC_DEST_CLUSTER_ID_SHIFT   16
+
+#define X2APIC_CLUSTER_ID_SHIFT                4
+
+#define APIC_BSP_PSEUDO_SIPI           0x100
+
+extern bool using_x2apic;
+
+int apic_init(void);
+int apic_cpu_init(struct per_cpu *cpu_data);
+
+void apic_nmi_handler(struct per_cpu *cpu_data);
+int apic_handle_events(struct per_cpu *cpu_data);
+
+void apic_handle_icr_write(struct per_cpu *cpu_data, u32 lo_val, u32 hi_val);
+
+unsigned int apic_mmio_access(struct registers *guest_regs,
+                             struct per_cpu *cpu_data, unsigned long rip,
+                             unsigned long page_table_addr, unsigned int reg,
+                             bool is_write);
+
+void x2apic_handle_write(struct registers *guest_regs);
+void x2apic_handle_read(struct registers *guest_regs);
diff --git a/hypervisor/arch/x86/include/asm/bitops.h b/hypervisor/arch/x86/include/asm/bitops.h
new file mode 100644 (file)
index 0000000..e9e592c
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_BITOPS_H
+#define _JAILHOUSE_ASM_BITOPS_H
+
+#include <asm/types.h>
+
+#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
+/* Technically wrong, but this avoids compilation errors on some gcc
+   versions. */
+#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
+#else
+#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#endif
+
+#define CONST_MASK_ADDR(nr, addr)      BITOP_ADDR((void *)(addr) + ((nr)>>3))
+#define CONST_MASK(nr)                 (1 << ((nr) & 7))
+
+static inline __attribute__((always_inline)) void
+clear_bit(int nr, volatile unsigned long *addr)
+{
+       if (__builtin_constant_p(nr)) {
+               asm volatile("lock andb %1,%0"
+                       : CONST_MASK_ADDR(nr, addr)
+                       : "iq" ((u8)~CONST_MASK(nr)));
+       } else {
+               asm volatile("lock btr %1,%0"
+                       : BITOP_ADDR(addr)
+                       : "Ir" (nr));
+       }
+}
+
+static inline __attribute__((always_inline)) void
+set_bit(unsigned int nr, volatile unsigned long *addr)
+{
+        if (__builtin_constant_p(nr)) {
+                asm volatile("lock orb %1,%0"
+                        : CONST_MASK_ADDR(nr, addr)
+                        : "iq" ((u8)CONST_MASK(nr))
+                        : "memory");
+        } else {
+                asm volatile("lock bts %1,%0"
+                        : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
+        }
+}
+
+static inline __attribute__((always_inline)) int
+constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
+{
+       return ((1UL << (nr % BITS_PER_LONG)) &
+               (addr[nr / BITS_PER_LONG])) != 0;
+}
+
+static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
+{
+       int oldbit;
+
+       asm volatile("bt %2,%1\n\t"
+                    "sbb %0,%0"
+                    : "=r" (oldbit)
+                    : "m" (*(unsigned long *)addr), "Ir" (nr));
+
+        return oldbit;
+}
+
+#define test_bit(nr, addr)                     \
+       (__builtin_constant_p((nr))             \
+        ? constant_test_bit((nr), (addr))      \
+        : variable_test_bit((nr), (addr)))
+
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+       int oldbit;
+
+       asm volatile("lock bts %2,%1\n\t"
+                    "sbb %0,%0" : "=r" (oldbit), BITOP_ADDR(addr)
+                    : "Ir" (nr) : "memory");
+
+       return oldbit;
+}
+
+static inline unsigned long ffz(unsigned long word)
+{
+       asm("rep; bsf %1,%0"
+               : "=r" (word)
+               : "r" (~word));
+       return word;
+}
+
+#endif /* !_JAILHOUSE_ASM_BITOPS_H */
diff --git a/hypervisor/arch/x86/include/asm/cell.h b/hypervisor/arch/x86/include/asm/cell.h
new file mode 100644 (file)
index 0000000..172a7d7
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_CELL_H
+#define _JAILHOUSE_ASM_CELL_H
+
+#include <asm/types.h>
+#include <asm/paging.h>
+
+#include <jailhouse/cell-config.h>
+
+struct cell {
+       struct {
+               /* should be first as it requires page alignment */
+               u8 __attribute__((aligned(PAGE_SIZE))) io_bitmap[2*PAGE_SIZE];
+               pgd_t *ept;
+       } vmx;
+
+       char name[JAILHOUSE_CELL_NAME_MAXLEN+1];
+       unsigned int id;
+
+       struct cpu_set *cpu_set;
+       struct cpu_set small_cpu_set;
+
+       unsigned long page_offset;
+
+       struct cell *next;
+};
+
+extern struct cell *cell_list;
+
+#endif /* !_JAILHOUSE_ASM_CELL_H */
diff --git a/hypervisor/arch/x86/include/asm/fault.h b/hypervisor/arch/x86/include/asm/fault.h
new file mode 100644 (file)
index 0000000..5334bd0
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/percpu.h>
+
+struct exception_frame;
+
+void __attribute__((noreturn))
+exception_handler(struct exception_frame *frame);
+
+void __attribute__((noreturn)) panic_stop(struct per_cpu *cpu_data);
diff --git a/hypervisor/arch/x86/include/asm/io.h b/hypervisor/arch/x86/include/asm/io.h
new file mode 100644 (file)
index 0000000..50d207b
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/types.h>
+
+static inline void outb(u8 v, u16 port)
+{
+       asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
+}
+
+static inline u8 inb(u16 port)
+{
+       u8 v;
+       asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
+       return v;
+}
diff --git a/hypervisor/arch/x86/include/asm/jailhouse.h b/hypervisor/arch/x86/include/asm/jailhouse.h
new file mode 100644 (file)
index 0000000..023e87d
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#define JAILHOUSE_CALL_INS     "vmcall"
+#define JAILHOUSE_CALL_RESULT  "=a" (result)
+#define JAILHOUSE_CALL_NUM     "a" (num)
+#define JAILHOUSE_CALL_ARG1    "D" (arg1)
+#define JAILHOUSE_CALL_ARG2    "S" (arg2)
+#define JAILHOUSE_CALL_ARG3    "d" (arg3)
+#define JAILHOUSE_CALL_ARG4    "c" (arg4)
+
+static inline __u32 jailhouse_call0(__u32 num)
+{
+       __u32 result;
+
+       asm volatile(JAILHOUSE_CALL_INS
+               : JAILHOUSE_CALL_RESULT
+               : JAILHOUSE_CALL_NUM
+               : "memory");
+       return result;
+}
+
+static inline __u32 jailhouse_call1(__u32 num, __u32 arg1)
+{
+       __u32 result;
+
+       asm volatile(JAILHOUSE_CALL_INS
+               : JAILHOUSE_CALL_RESULT
+               : JAILHOUSE_CALL_NUM, JAILHOUSE_CALL_ARG1
+               : "memory");
+       return result;
+}
+
+static inline __u32 jailhouse_call2(__u32 num, __u32 arg1, __u32 arg2)
+{
+       __u32 result;
+
+       asm volatile(JAILHOUSE_CALL_INS
+               : JAILHOUSE_CALL_RESULT
+               : JAILHOUSE_CALL_NUM, JAILHOUSE_CALL_ARG1, JAILHOUSE_CALL_ARG2
+               : "memory");
+       return result;
+}
+
+static inline __u32 jailhouse_call3(__u32 num, __u32 arg1, __u32 arg2,
+                                  __u32 arg3)
+{
+       __u32 result;
+
+       asm volatile(JAILHOUSE_CALL_INS
+               : JAILHOUSE_CALL_RESULT
+               : JAILHOUSE_CALL_NUM, JAILHOUSE_CALL_ARG1, JAILHOUSE_CALL_ARG2,
+                 JAILHOUSE_CALL_ARG3
+               : "memory");
+       return result;
+}
+
+static inline __u32 jailhouse_call4(__u32 num, __u32 arg1, __u32 arg2,
+                                  __u32 arg3, __u32 arg4)
+{
+       __u32 result;
+
+       asm volatile(JAILHOUSE_CALL_INS
+               : JAILHOUSE_CALL_RESULT
+               : JAILHOUSE_CALL_NUM, JAILHOUSE_CALL_ARG1, JAILHOUSE_CALL_ARG2,
+                 JAILHOUSE_CALL_ARG3, JAILHOUSE_CALL_ARG4
+               : "memory");
+       return result;
+}
diff --git a/hypervisor/arch/x86/include/asm/paging.h b/hypervisor/arch/x86/include/asm/paging.h
new file mode 100644 (file)
index 0000000..be88c95
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PAGING_H
+#define _JAILHOUSE_ASM_PAGING_H
+
+#include <asm/types.h>
+#include <asm/processor.h>
+
+#define PAGE_SIZE              4096
+#define PAGE_MASK              ~(PAGE_SIZE - 1)
+
+#define PAGE_DIR_LEVELS                4
+
+#define PAGE_TABLE_OFFS_MASK   0x0000000000000ff8UL
+#define PAGE_ADDR_MASK         0x000ffffffffff000UL
+#define PAGE_OFFS_MASK         0x0000000000000fffUL
+#define HUGEPAGE_ADDR_MASK     0x000fffffffe00000UL
+#define HUGEPAGE_OFFS_MASK     0x00000000001fffffUL
+
+#define PAGE_FLAG_PRESENT      0x01
+#define PAGE_FLAG_RW           0x02
+#define PAGE_FLAG_UNCACHED     0x10
+
+#define PAGE_DEFAULT_FLAGS     (PAGE_FLAG_PRESENT | PAGE_FLAG_RW )
+#define PAGE_READONLY_FLAGS    PAGE_FLAG_PRESENT
+
+#define INVALID_PHYS_ADDR      (~0UL)
+
+#define REMAP_BASE_ADDR                0x0000000000100000UL
+#define NUM_REMAP_BITMAP_PAGES 1
+
+#define FOREIGN_MAPPING_BASE   REMAP_BASE_ADDR
+#define NUM_FOREIGN_PAGES      16
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long pgd_t;
+typedef unsigned long pud_t;
+typedef unsigned long pmd_t;
+typedef unsigned long pte_t;
+
+static inline bool pgd_valid(pgd_t *pgd)
+{
+       return *pgd & 1;
+}
+
+static inline pgd_t *pgd_offset(pgd_t *page_table, unsigned long addr)
+{
+       return (pgd_t *)((unsigned long)page_table +
+                        ((addr >> 36) & PAGE_TABLE_OFFS_MASK));
+}
+
+static inline void set_pgd(pgd_t *pgd, unsigned long addr, unsigned long flags)
+{
+       *pgd = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pgd(pgd_t *pgd)
+{
+       *pgd = 0;
+}
+
+static inline bool pud_valid(pud_t *pud)
+{
+       return *pud & 1;
+}
+
+static inline pud_t *pud4l_offset(pgd_t *pgd, unsigned long page_table_offset,
+                                 unsigned long addr)
+{
+       unsigned long pud = (*pgd & PAGE_ADDR_MASK) +
+               ((addr >> 27) & PAGE_TABLE_OFFS_MASK);
+
+       return (pud_t *)(pud + page_table_offset);
+}
+
+static inline pud_t *pud3l_offset(pgd_t *page_table, unsigned long addr)
+{
+       return (pud_t *)((unsigned long)page_table +
+                        ((addr >> 27) & PAGE_TABLE_OFFS_MASK));
+}
+
+static inline void set_pud(pud_t *pud, unsigned long addr, unsigned long flags)
+{
+       *pud = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pud(pud_t *pud)
+{
+       *pud = 0;
+}
+
+static inline bool pmd_valid(pmd_t *pmd)
+{
+       return *pmd & 1;
+}
+
+static inline bool pmd_is_hugepage(pmd_t *pmd)
+{
+       return *pmd & (1 << 7);
+}
+
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long page_table_offset,
+                               unsigned long addr)
+{
+       unsigned long pmd = (*pud & PAGE_ADDR_MASK) +
+               ((addr >> 18) & PAGE_TABLE_OFFS_MASK);
+
+       return (pmd_t *)(pmd + page_table_offset);
+}
+
+static inline void set_pmd(pmd_t *pmd, unsigned long addr, unsigned long flags)
+{
+       *pmd = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pmd(pmd_t *pmd)
+{
+       *pmd = 0;
+}
+
+static inline bool pte_valid(pte_t *pte)
+{
+       return *pte & 1;
+}
+
+static inline pte_t *pte_offset(pmd_t *pmd, unsigned long page_table_offset,
+                               unsigned long addr)
+{
+       unsigned long pte = (*pmd & PAGE_ADDR_MASK) +
+               ((addr >> 9) & PAGE_TABLE_OFFS_MASK);
+
+       return (pte_t *)(pte + page_table_offset);
+}
+
+static inline void set_pte(pte_t *pte, unsigned long addr, unsigned long flags)
+{
+       *pte = (addr & PAGE_ADDR_MASK) | flags;
+}
+
+static inline void clear_pte(pte_t *pte)
+{
+       *pte = 0;
+}
+
+static inline unsigned long phys_address(pte_t *pte, unsigned long addr)
+{
+       return (*pte & PAGE_ADDR_MASK) + (addr & PAGE_OFFS_MASK);
+}
+
+static inline unsigned long phys_address_hugepage(pmd_t *pmd,
+                                                 unsigned long addr)
+{
+       return (*pmd & HUGEPAGE_ADDR_MASK) + (addr & HUGEPAGE_OFFS_MASK);
+}
+
+static inline bool pud_empty(pgd_t *pgd, unsigned long page_table_offset)
+{
+       pud_t *pud = (pud_t *)((*pgd & PAGE_ADDR_MASK) + page_table_offset);
+       int n;
+
+       for (n = 0; n < PAGE_SIZE / sizeof(pud_t); n++, pud++)
+               if (pud_valid(pud))
+                       return false;
+       return true;
+}
+
+static inline bool pmd_empty(pud_t *pud, unsigned long page_table_offset)
+{
+       pmd_t *pmd = (pmd_t *)((*pud & PAGE_ADDR_MASK) + page_table_offset);
+       int n;
+
+       for (n = 0; n < PAGE_SIZE / sizeof(pmd_t); n++, pmd++)
+               if (pmd_valid(pmd))
+                       return false;
+       return true;
+}
+
+static inline bool pt_empty(pmd_t *pmd, unsigned long page_table_offset)
+{
+       pte_t *pte = (pte_t *)((*pmd & PAGE_ADDR_MASK) + page_table_offset);
+       int n;
+
+       for (n = 0; n < PAGE_SIZE / sizeof(pte_t); n++, pte++)
+               if (pte_valid(pte))
+                       return false;
+       return true;
+}
+
+static inline void flush_tlb(void)
+{
+       unsigned long cr4 = read_cr4();
+
+       write_cr4(cr4 & ~X86_CR4_PGE);
+       write_cr4(cr4);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PAGING_H */
diff --git a/hypervisor/arch/x86/include/asm/percpu.h b/hypervisor/arch/x86/include/asm/percpu.h
new file mode 100644 (file)
index 0000000..76662e8
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PERCPU_H
+#define _JAILHOUSE_ASM_PERCPU_H
+
+#include <asm/types.h>
+#include <asm/paging.h>
+
+#define NUM_ENTRY_REGS                 6
+
+/* Keep in sync with struct per_cpu! */
+#define PERCPU_SIZE_SHIFT              14
+#define PERCPU_STACK_END               PAGE_SIZE
+#define PERCPU_LINUX_SP                        PERCPU_STACK_END
+#define PERCPU_CPU_ID                  (PERCPU_LINUX_SP + 8)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/cell.h>
+
+struct per_cpu {
+       /* Keep these three in sync with defines above! */
+       u8 stack[PAGE_SIZE];
+       unsigned long linux_sp;
+       unsigned int cpu_id;
+
+       u32 apic_id;
+       struct cell *cell;
+
+       struct desc_table_reg linux_gdtr;
+       struct desc_table_reg linux_idtr;
+       unsigned long linux_reg[NUM_ENTRY_REGS];
+       unsigned long linux_ip;
+       unsigned long linux_cr3;
+       unsigned long linux_cs;
+       unsigned long linux_tr;
+       unsigned long linux_tr_base;
+       u32 linux_tr_limit;
+       u32 linux_tr_ar_bytes;
+       unsigned long linux_efer;
+       unsigned long linux_fs_base;
+       unsigned long linux_gs_base;
+       unsigned long linux_sysenter_cs;
+       unsigned long linux_sysenter_eip;
+       unsigned long linux_sysenter_esp;
+       bool initialized;
+       enum { VMXOFF = 0, VMXON, VMCS_READY } vmx_state;
+
+       volatile bool stop_cpu;
+       volatile bool wait_for_sipi;
+       volatile bool cpu_stopped;
+       bool init_signaled;
+       int sipi_vector;
+       bool flush_caches;
+       bool shutdown_cpu;
+
+       u8 vmxon_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+       u8 vmcs_page[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE)));
+} __attribute__((aligned(PAGE_SIZE)));
+
+static inline struct per_cpu *per_cpu(unsigned int cpu)
+{
+       struct per_cpu *cpu_data;
+
+       asm volatile(
+               "lea __page_pool(%%rip),%0\n\t"
+               "add %1,%0\n\t"
+               : "=&qm" (cpu_data)
+               : "qm" ((unsigned long)cpu << PERCPU_SIZE_SHIFT));
+       return cpu_data;
+}
+
+/* Validate defines */
+#define CHECK_ASSUMPTION(assume)       ((void)sizeof(char[1 - 2*!(assume)]))
+
+static inline void __check_assumptions(void)
+{
+       struct per_cpu cpu_data;
+
+       CHECK_ASSUMPTION(sizeof(struct per_cpu) == (1 << PERCPU_SIZE_SHIFT));
+       CHECK_ASSUMPTION(sizeof(cpu_data.stack) == PERCPU_STACK_END);
+       CHECK_ASSUMPTION(__builtin_offsetof(struct per_cpu, linux_sp) ==
+                        PERCPU_LINUX_SP);
+       CHECK_ASSUMPTION(__builtin_offsetof(struct per_cpu, cpu_id) ==
+                        PERCPU_CPU_ID);
+}
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PERCPU_H */
diff --git a/hypervisor/arch/x86/include/asm/processor.h b/hypervisor/arch/x86/include/asm/processor.h
new file mode 100644 (file)
index 0000000..7ffb7ba
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_PROCESSOR_H
+#define _JAILHOUSE_ASM_PROCESSOR_H
+
+#include <asm/types.h>
+
+#define X86_FEATURE_VMX                                        (1 << 5)
+
+#define X86_CR0_PE                                     0x00000001
+#define X86_CR0_ET                                     0x00000010
+#define X86_CR0_NW                                     0x20000000
+#define X86_CR0_CD                                     0x40000000
+#define X86_CR0_PG                                     0x80000000
+
+#define X86_CR4_PGE                                    0x00000080
+#define X86_CR4_VMXE                                   0x00002000
+
+#define MSR_IA32_APICBASE                              0x0000001b
+#define MSR_IA32_FEATURE_CONTROL                       0x0000003a
+#define MSR_IA32_SYSENTER_CS                           0x00000174
+#define MSR_IA32_SYSENTER_ESP                          0x00000175
+#define MSR_IA32_SYSENTER_EIP                          0x00000176
+#define MSR_IA32_VMX_BASIC                             0x00000480
+#define MSR_IA32_VMX_PINBASED_CTLS                     0x00000481
+#define MSR_IA32_VMX_PROCBASED_CTLS                    0x00000482
+#define MSR_IA32_VMX_EXIT_CTLS                         0x00000483
+#define MSR_IA32_VMX_ENTRY_CTLS                                0x00000484
+#define MSR_IA32_VMX_CR0_FIXED0                                0x00000486
+#define MSR_IA32_VMX_CR0_FIXED1                                0x00000487
+#define MSR_IA32_VMX_CR4_FIXED0                                0x00000488
+#define MSR_IA32_VMX_CR4_FIXED1                                0x00000489
+#define MSR_IA32_VMX_PROCBASED_CTLS2                   0x0000048b
+#define MSR_IA32_VMX_EPT_VPID_CAP                      0x0000048c
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS                        0x0000048d
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS               0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT_CTLS                    0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS                   0x00000490
+#define MSR_X2APIC_BASE                                        0x00000800
+#define MSR_X2APIC_ICR                                 0x00000830
+#define MSR_X2APIC_SELF_IPI                            0x0000083f
+#define MSR_X2APIC_END                                 MSR_X2APIC_SELF_IPI
+#define MSR_EFER                                       0xc0000080
+#define MSR_FS_BASE                                    0xc0000100
+#define MSR_GS_BASE                                    0xc0000101
+
+#define FEATURE_CONTROL_LOCKED                         (1 << 0)
+#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX      (1 << 2)
+
+#define EFER_LME                                       0x00000100
+#define EFER_LMA                                       0x00000400
+
+#define GDT_DESC_NULL                                  0
+#define GDT_DESC_CODE                                  1
+#define GDT_DESC_TSS                                   2
+#define GDT_DESC_TSS_HI                                        3
+#define NUM_GDT_DESC                                   4
+
+#define X86_INST_LEN_CPUID                             2
+#define X86_INST_LEN_RDMSR                             2
+#define X86_INST_LEN_WRMSR                             2
+#define X86_INST_LEN_VMCALL                            3
+#define X86_INST_LEN_MOV_TO_CR                         3
+
+#define X86_OP_REGR_PREFIX                             0x44
+#define X86_OP_MOV_TO_MEM                              0x89
+#define X86_OP_MOV_FROM_MEM                            0x8b
+
+#define NMI_VECTOR                                     2
+
+#ifndef __ASSEMBLY__
+
+struct registers {
+       unsigned long r15;
+       unsigned long r14;
+       unsigned long r13;
+       unsigned long r12;
+       unsigned long r11;
+       unsigned long r10;
+       unsigned long r9;
+       unsigned long r8;
+       unsigned long rdi;
+       unsigned long rsi;
+       unsigned long rbp;
+       unsigned long unused;
+       unsigned long rbx;
+       unsigned long rdx;
+       unsigned long rcx;
+       unsigned long rax;
+};
+
+static unsigned long __force_order;
+
+static inline void cpu_relax(void)
+{
+       asm volatile("rep; nop");
+}
+
+static inline void memory_barrier(void)
+{
+       asm volatile("mfence" : : : "memory");
+}
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+                          unsigned int *ecx, unsigned int *edx)
+{
+       /* ecx is often an input as well as an output. */
+       asm volatile("cpuid"
+           : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+           : "0" (*eax), "2" (*ecx)
+           : "memory");
+}
+
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
+                        unsigned int *ecx, unsigned int *edx)
+{
+       *eax =op;
+       *ecx = 0;
+       __cpuid(eax, ebx, ecx, edx);
+}
+
+static inline unsigned int cpuid_ecx(unsigned int op)
+{
+       unsigned int eax, ebx, ecx, edx;
+
+       cpuid(op, &eax, &ebx, &ecx, &edx);
+       return ecx;
+}
+
+static inline unsigned long read_cr0(void)
+{
+       unsigned long cr0;
+
+       asm volatile("mov %%cr0,%0" : "=r" (cr0), "=m" (__force_order));
+       return cr0;
+}
+
+static inline void write_cr0(unsigned long val)
+{
+       asm volatile("mov %0,%%cr0" : : "r" (val), "m" (__force_order));
+}
+
+static inline unsigned long read_cr3(void)
+{
+       unsigned long cr3;
+
+       asm volatile("mov %%cr3,%0" : "=r" (cr3), "=m" (__force_order));
+       return cr3;
+}
+
+static inline void write_cr3(unsigned long val)
+{
+       asm volatile("mov %0,%%cr3" : : "r" (val), "m" (__force_order));
+}
+
+static inline unsigned long read_cr4(void)
+{
+       unsigned long cr4;
+
+       asm volatile("mov %%cr4,%0" : "=r" (cr4), "=m" (__force_order));
+       return cr4;
+}
+
+static inline void write_cr4(unsigned long val)
+{
+       asm volatile("mov %0,%%cr4" : : "r" (val), "m" (__force_order));
+}
+
+static inline unsigned long read_msr(unsigned int msr)
+{
+       u32 low, high;
+
+       asm volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr));
+       return low | ((unsigned long)high << 32);
+}
+
+static inline void write_msr(unsigned int msr, unsigned long val)
+{
+       asm volatile("wrmsr"
+               : /* no output */
+               : "c" (msr), "a" (val), "d" (val >> 32)
+               : "memory");
+}
+
+static inline void read_gdtr(struct desc_table_reg *val)
+{
+       asm volatile("sgdtq %0" : "=m" (*val));
+}
+
+static inline void write_gdtr(struct desc_table_reg *val)
+{
+       asm volatile("lgdtq %0" : "=m" (*val));
+}
+
+static inline void read_idtr(struct desc_table_reg *val)
+{
+       asm volatile("sidtq %0" : "=m" (*val));
+}
+
+static inline void write_idtr(struct desc_table_reg *val)
+{
+       asm volatile("lidtq %0" : "=m" (*val));
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_PROCESSOR_H */
diff --git a/hypervisor/arch/x86/include/asm/spinlock.h b/hypervisor/arch/x86/include/asm/spinlock.h
new file mode 100644 (file)
index 0000000..efc0843
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/bitops.h>
+#include <asm/processor.h>
+
+typedef struct {
+       unsigned long state;
+} spinlock_t;
+
+#define DEFINE_SPINLOCK(name)  spinlock_t (name)
+
+static inline void spin_lock(spinlock_t *lock)
+{
+       while (test_and_set_bit(0, &lock->state))
+               cpu_relax();
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+       asm volatile("": : :"memory");
+       clear_bit(0, &lock->state);
+}
diff --git a/hypervisor/arch/x86/include/asm/types.h b/hypervisor/arch/x86/include/asm/types.h
new file mode 100644 (file)
index 0000000..0da0777
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ASM_TYPES_H
+#define _JAILHOUSE_ASM_TYPES_H
+
+#define NULL                           ((void *)0)
+
+#define BITS_PER_LONG                  64
+
+#ifndef __ASSEMBLY__
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long s64;
+typedef unsigned long u64;
+
+typedef s8 __s8;
+typedef u8 __u8;
+
+typedef s16 __s16;
+typedef u16 __u16;
+
+typedef s32 __s32;
+typedef u32 __u32;
+
+typedef s64 __s64;
+typedef u64 __u64;
+
+typedef enum { true=1, false=0 } bool;
+
+struct desc_table_reg {
+       u16 limit;
+       u64 base;
+} __attribute__((packed));
+
+struct cpu_set {
+       unsigned long max_cpu_id;
+       /* Note: The bitmap is supposed to be extended by embedding this
+        * struct into a larger buffer. */
+       unsigned long bitmap[1];
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* !_JAILHOUSE_ASM_TYPES_H */
diff --git a/hypervisor/arch/x86/include/asm/vmx.h b/hypervisor/arch/x86/include/asm/vmx.h
new file mode 100644 (file)
index 0000000..9cf832d
--- /dev/null
@@ -0,0 +1,289 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/types.h>
+#include <asm/paging.h>
+#include <asm/processor.h>
+
+#include <jailhouse/cell-config.h>
+
+/* VMCS Encodings */
+enum vmcs_field {
+       VIRTUAL_PROCESSOR_ID            = 0x00000000,
+       GUEST_ES_SELECTOR               = 0x00000800,
+       GUEST_CS_SELECTOR               = 0x00000802,
+       GUEST_SS_SELECTOR               = 0x00000804,
+       GUEST_DS_SELECTOR               = 0x00000806,
+       GUEST_FS_SELECTOR               = 0x00000808,
+       GUEST_GS_SELECTOR               = 0x0000080a,
+       GUEST_LDTR_SELECTOR             = 0x0000080c,
+       GUEST_TR_SELECTOR               = 0x0000080e,
+       HOST_ES_SELECTOR                = 0x00000c00,
+       HOST_CS_SELECTOR                = 0x00000c02,
+       HOST_SS_SELECTOR                = 0x00000c04,
+       HOST_DS_SELECTOR                = 0x00000c06,
+       HOST_FS_SELECTOR                = 0x00000c08,
+       HOST_GS_SELECTOR                = 0x00000c0a,
+       HOST_TR_SELECTOR                = 0x00000c0c,
+       IO_BITMAP_A                     = 0x00002000,
+       IO_BITMAP_A_HIGH                = 0x00002001,
+       IO_BITMAP_B                     = 0x00002002,
+       IO_BITMAP_B_HIGH                = 0x00002003,
+       MSR_BITMAP                      = 0x00002004,
+       MSR_BITMAP_HIGH                 = 0x00002005,
+       VM_EXIT_MSR_STORE_ADDR          = 0x00002006,
+       VM_EXIT_MSR_STORE_ADDR_HIGH     = 0x00002007,
+       VM_EXIT_MSR_LOAD_ADDR           = 0x00002008,
+       VM_EXIT_MSR_LOAD_ADDR_HIGH      = 0x00002009,
+       VM_ENTRY_MSR_LOAD_ADDR          = 0x0000200a,
+       VM_ENTRY_MSR_LOAD_ADDR_HIGH     = 0x0000200b,
+       TSC_OFFSET                      = 0x00002010,
+       TSC_OFFSET_HIGH                 = 0x00002011,
+       VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
+       VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+       APIC_ACCESS_ADDR                = 0x00002014,
+       APIC_ACCESS_ADDR_HIGH           = 0x00002015,
+       EPT_POINTER                     = 0x0000201a,
+       EPT_POINTER_HIGH                = 0x0000201b,
+       GUEST_PHYSICAL_ADDRESS          = 0x00002400,
+       GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
+       VMCS_LINK_POINTER               = 0x00002800,
+       VMCS_LINK_POINTER_HIGH          = 0x00002801,
+       GUEST_IA32_DEBUGCTL             = 0x00002802,
+       GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
+       GUEST_IA32_PAT                  = 0x00002804,
+       GUEST_IA32_PAT_HIGH             = 0x00002805,
+       GUEST_IA32_EFER                 = 0x00002806,
+       GUEST_IA32_EFER_HIGH            = 0x00002807,
+       GUEST_IA32_PERF_GLOBAL_CTRL     = 0x00002808,
+       GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+       GUEST_PDPTR0                    = 0x0000280a,
+       GUEST_PDPTR0_HIGH               = 0x0000280b,
+       GUEST_PDPTR1                    = 0x0000280c,
+       GUEST_PDPTR1_HIGH               = 0x0000280d,
+       GUEST_PDPTR2                    = 0x0000280e,
+       GUEST_PDPTR2_HIGH               = 0x0000280f,
+       GUEST_PDPTR3                    = 0x00002810,
+       GUEST_PDPTR3_HIGH               = 0x00002811,
+       HOST_IA32_PAT                   = 0x00002c00,
+       HOST_IA32_PAT_HIGH              = 0x00002c01,
+       HOST_IA32_EFER                  = 0x00002c02,
+       HOST_IA32_EFER_HIGH             = 0x00002c03,
+       HOST_IA32_PERF_GLOBAL_CTRL      = 0x00002c04,
+       HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+       PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
+       CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
+       EXCEPTION_BITMAP                = 0x00004004,
+       PAGE_FAULT_ERROR_CODE_MASK      = 0x00004006,
+       PAGE_FAULT_ERROR_CODE_MATCH     = 0x00004008,
+       CR3_TARGET_COUNT                = 0x0000400a,
+       VM_EXIT_CONTROLS                = 0x0000400c,
+       VM_EXIT_MSR_STORE_COUNT         = 0x0000400e,
+       VM_EXIT_MSR_LOAD_COUNT          = 0x00004010,
+       VM_ENTRY_CONTROLS               = 0x00004012,
+       VM_ENTRY_MSR_LOAD_COUNT         = 0x00004014,
+       VM_ENTRY_INTR_INFO_FIELD        = 0x00004016,
+       VM_ENTRY_EXCEPTION_ERROR_CODE   = 0x00004018,
+       VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
+       TPR_THRESHOLD                   = 0x0000401c,
+       SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
+       PLE_GAP                         = 0x00004020,
+       PLE_WINDOW                      = 0x00004022,
+       VM_INSTRUCTION_ERROR            = 0x00004400,
+       VM_EXIT_REASON                  = 0x00004402,
+       VM_EXIT_INTR_INFO               = 0x00004404,
+       VM_EXIT_INTR_ERROR_CODE         = 0x00004406,
+       IDT_VECTORING_INFO_FIELD        = 0x00004408,
+       IDT_VECTORING_ERROR_CODE        = 0x0000440a,
+       VM_EXIT_INSTRUCTION_LEN         = 0x0000440c,
+       VMX_INSTRUCTION_INFO            = 0x0000440e,
+       GUEST_ES_LIMIT                  = 0x00004800,
+       GUEST_CS_LIMIT                  = 0x00004802,
+       GUEST_SS_LIMIT                  = 0x00004804,
+       GUEST_DS_LIMIT                  = 0x00004806,
+       GUEST_FS_LIMIT                  = 0x00004808,
+       GUEST_GS_LIMIT                  = 0x0000480a,
+       GUEST_LDTR_LIMIT                = 0x0000480c,
+       GUEST_TR_LIMIT                  = 0x0000480e,
+       GUEST_GDTR_LIMIT                = 0x00004810,
+       GUEST_IDTR_LIMIT                = 0x00004812,
+       GUEST_ES_AR_BYTES               = 0x00004814,
+       GUEST_CS_AR_BYTES               = 0x00004816,
+       GUEST_SS_AR_BYTES               = 0x00004818,
+       GUEST_DS_AR_BYTES               = 0x0000481a,
+       GUEST_FS_AR_BYTES               = 0x0000481c,
+       GUEST_GS_AR_BYTES               = 0x0000481e,
+       GUEST_LDTR_AR_BYTES             = 0x00004820,
+       GUEST_TR_AR_BYTES               = 0x00004822,
+       GUEST_INTERRUPTIBILITY_INFO     = 0x00004824,
+       GUEST_ACTIVITY_STATE            = 0X00004826,
+       GUEST_SYSENTER_CS               = 0x0000482A,
+       VMX_PREEMPTION_TIMER_VALUE      = 0x0000482E,
+       HOST_IA32_SYSENTER_CS           = 0x00004c00,
+       CR0_GUEST_HOST_MASK             = 0x00006000,
+       CR4_GUEST_HOST_MASK             = 0x00006002,
+       CR0_READ_SHADOW                 = 0x00006004,
+       CR4_READ_SHADOW                 = 0x00006006,
+       CR3_TARGET_VALUE0               = 0x00006008,
+       CR3_TARGET_VALUE1               = 0x0000600a,
+       CR3_TARGET_VALUE2               = 0x0000600c,
+       CR3_TARGET_VALUE3               = 0x0000600e,
+       EXIT_QUALIFICATION              = 0x00006400,
+       GUEST_LINEAR_ADDRESS            = 0x0000640a,
+       GUEST_CR0                       = 0x00006800,
+       GUEST_CR3                       = 0x00006802,
+       GUEST_CR4                       = 0x00006804,
+       GUEST_ES_BASE                   = 0x00006806,
+       GUEST_CS_BASE                   = 0x00006808,
+       GUEST_SS_BASE                   = 0x0000680a,
+       GUEST_DS_BASE                   = 0x0000680c,
+       GUEST_FS_BASE                   = 0x0000680e,
+       GUEST_GS_BASE                   = 0x00006810,
+       GUEST_LDTR_BASE                 = 0x00006812,
+       GUEST_TR_BASE                   = 0x00006814,
+       GUEST_GDTR_BASE                 = 0x00006816,
+       GUEST_IDTR_BASE                 = 0x00006818,
+       GUEST_DR7                       = 0x0000681a,
+       GUEST_RSP                       = 0x0000681c,
+       GUEST_RIP                       = 0x0000681e,
+       GUEST_RFLAGS                    = 0x00006820,
+       GUEST_PENDING_DBG_EXCEPTIONS    = 0x00006822,
+       GUEST_SYSENTER_ESP              = 0x00006824,
+       GUEST_SYSENTER_EIP              = 0x00006826,
+       HOST_CR0                        = 0x00006c00,
+       HOST_CR3                        = 0x00006c02,
+       HOST_CR4                        = 0x00006c04,
+       HOST_FS_BASE                    = 0x00006c06,
+       HOST_GS_BASE                    = 0x00006c08,
+       HOST_TR_BASE                    = 0x00006c0a,
+       HOST_GDTR_BASE                  = 0x00006c0c,
+       HOST_IDTR_BASE                  = 0x00006c0e,
+       HOST_IA32_SYSENTER_ESP          = 0x00006c10,
+       HOST_IA32_SYSENTER_EIP          = 0x00006c12,
+       HOST_RSP                        = 0x00006c14,
+       HOST_RIP                        = 0x00006c16,
+};
+
+#define GUEST_ACTIVITY_ACTIVE                  0
+
+#define VMX_MSR_BITMAP_0000_READ               0
+#define VMX_MSR_BITMAP_C000_READ               1
+#define VMX_MSR_BITMAP_0000_WRITE              2
+#define VMX_MSR_BITMAP_C000_WRITE              3
+
+#define PIN_BASED_NMI_EXITING                  0x00000008
+#define PIN_BASED_VMX_PREEMPTION_TIMER         0x00000040
+
+#define CPU_BASED_USE_IO_BITMAPS               0x02000000
+#define CPU_BASED_USE_MSR_BITMAPS              0x10000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS  0x80000000
+
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES        0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT              0x00000002
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST      0x00000080
+
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE           0x00000200
+#define VM_EXIT_SAVE_IA32_EFER                 0x00100000
+#define VM_EXIT_LOAD_IA32_EFER                 0x00200000
+
+#define VM_ENTRY_IA32E_MODE                    0x00000200
+#define VM_ENTRY_LOAD_IA32_EFER                        0x00008000
+
+#define INTR_INFO_UNBLOCK_NMI                  0x1000
+
+#define EXIT_REASONS_FAILED_VMENTRY            0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI              0
+#define EXIT_REASON_EXTERNAL_INTERRUPT         1
+#define EXIT_REASON_TRIPLE_FAULT               2
+#define EXIT_REASON_INIT_SIGNAL                        3
+#define EXIT_REASON_SIPI                       4
+#define EXIT_REASON_IO_SMI                     5
+#define EXIT_REASON_OTHER_SMI                  6
+#define EXIT_REASON_PENDING_INTERRUPT          7
+#define EXIT_REASON_NMI_WINDOW                 8
+#define EXIT_REASON_TASK_SWITCH                        9
+#define EXIT_REASON_CPUID                      10
+#define EXIT_REASON_HLT                                12
+#define EXIT_REASON_INVD                       13
+#define EXIT_REASON_INVLPG                     14
+#define EXIT_REASON_RDPMC                      15
+#define EXIT_REASON_RDTSC                      16
+#define EXIT_REASON_VMCALL                     18
+#define EXIT_REASON_VMCLEAR                    19
+#define EXIT_REASON_VMLAUNCH                   20
+#define EXIT_REASON_VMPTRLD                    21
+#define EXIT_REASON_VMPTRST                    22
+#define EXIT_REASON_VMREAD                     23
+#define EXIT_REASON_VMRESUME                   24
+#define EXIT_REASON_VMWRITE                    25
+#define EXIT_REASON_VMOFF                      26
+#define EXIT_REASON_VMON                       27
+#define EXIT_REASON_CR_ACCESS                  28
+#define EXIT_REASON_DR_ACCESS                  29
+#define EXIT_REASON_IO_INSTRUCTION             30
+#define EXIT_REASON_MSR_READ                   31
+#define EXIT_REASON_MSR_WRITE                  32
+#define EXIT_REASON_INVALID_STATE              33
+#define EXIT_REASON_MWAIT_INSTRUCTION          36
+#define EXIT_REASON_MONITOR_INSTRUCTION                39
+#define EXIT_REASON_PAUSE_INSTRUCTION          40
+#define EXIT_REASON_MCE_DURING_VMENTRY         41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD                43
+#define EXIT_REASON_APIC_ACCESS                        44
+#define EXIT_REASON_EPT_VIOLATION              48
+#define EXIT_REASON_EPT_MISCONFIG              49
+#define EXIT_REASON_PREEMPTION_TIMER           52
+#define EXIT_REASON_WBINVD                     54
+#define EXIT_REASON_XSETBV                     55
+#define EXIT_REASON_INVPCID                    58
+
+#define EPT_FLAG_READ                          0x001
+#define EPT_FLAG_WRITE                         0x002
+#define EPT_FLAG_EXECUTE                       0x004
+#define EPT_FLAG_WB_TYPE                       0x030
+
+#define EPT_TYPE_UNCACHEABLE                   0
+#define EPT_TYPE_WRITEBACK                     6
+#define EPT_PAGE_WALK_LEN                      ((4-1) << 3)
+
+#define EPT_PAGE_WALK_4                                (1UL << 6)
+#define EPTP_WB                                        (1UL << 14)
+#define EPT_INVEPT                             (1UL << 20)
+#define EPT_INVEPT_SINGLE                      (1UL << 25)
+#define EPT_INVEPT_GLOBAL                      (1UL << 26)
+#define EPT_MANDATORY_FEATURES                 (EPT_PAGE_WALK_4 | EPTP_WB | \
+                                                EPT_INVEPT)
+
+#define VMX_INVEPT_SINGLE                      1
+#define VMX_INVEPT_GLOBAL                      2
+
+#define APIC_ACCESS_OFFET_MASK                 0x00000fff
+#define APIC_ACCESS_TYPE_MASK                  0x0000f000
+#define APIC_ACCESS_TYPE_LINEAR_READ           0x00000000
+#define APIC_ACCESS_TYPE_LINEAR_WRITE          0x00001000
+
+void vmx_init(void);
+
+int vmx_cell_init(struct cell *cell, struct jailhouse_cell_desc *config);
+void vmx_cell_shrink(struct cell *cell, struct jailhouse_cell_desc *config);
+
+int vmx_cpu_init(struct per_cpu *cpu_data);
+void vmx_cpu_exit(struct per_cpu *cpu_data);
+
+void __attribute__((noreturn)) vmx_cpu_activate_vmm(struct per_cpu *cpu_data);
+void vmx_handle_exit(struct registers *guest_regs, struct per_cpu *cpu_data);
+void vmx_entry_failure(struct per_cpu *cpu_data);
+
+void vmx_invept(void);
+
+void vmx_schedule_vmexit(struct per_cpu *cpu_data);
diff --git a/hypervisor/arch/x86/mmio.c b/hypervisor/arch/x86/mmio.c
new file mode 100644 (file)
index 0000000..a8f15b0
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/mmio.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/printk.h>
+#include <asm/spinlock.h>
+#include <asm/fault.h>
+
+struct modrm {
+       u8 rm:3;
+       u8 reg:3;
+       u8 mod:2;
+} __attribute__((packed));
+
+struct sib {
+       u8 reg:3;
+       u8 index:3;
+       u8 ss:2;
+} __attribute__((packed));
+
+static DEFINE_SPINLOCK(mmio_lock);
+
+struct mmio_access mmio_parse(struct per_cpu *cpu_data, unsigned long pc,
+                             unsigned long page_table_addr, bool is_write)
+{
+       struct mmio_access access = { .inst_len = 0 };
+       unsigned int cpu_id = cpu_data->cpu_id;
+       struct cell *cell = cpu_data->cell;
+       bool has_regr, has_modrm, does_write;
+       struct modrm modrm;
+       struct sib sib;
+       u8 *page;
+
+       spin_lock(&mmio_lock);
+
+       access.inst_len = 0;
+       has_regr = false;
+
+restart:
+       page = page_map_get_foreign_page(cpu_id, page_table_addr,
+                                        cell->page_offset, pc,
+                                        PAGE_DEFAULT_FLAGS);
+       if (!page)
+               goto error_nopage;
+
+       has_modrm = false;
+       switch (page[pc & PAGE_OFFS_MASK]) {
+       case X86_OP_REGR_PREFIX:
+               if (has_regr)
+                       goto error_unsupported;
+               has_regr = true;
+               pc++;
+               access.inst_len++;
+               goto restart;
+       case X86_OP_MOV_TO_MEM:
+               access.inst_len += 2;
+               access.size = 4;
+               has_modrm = true;
+               does_write = true;
+               break;
+       case X86_OP_MOV_FROM_MEM:
+               access.inst_len += 2;
+               access.size = 4;
+               has_modrm = true;
+               does_write = false;
+               break;
+       default:
+               goto error_unsupported;
+       }
+
+       if (has_modrm) {
+               pc++;
+               page = page_map_get_foreign_page(cpu_id, page_table_addr,
+                                                cell->page_offset, pc,
+                                                PAGE_DEFAULT_FLAGS);
+               if (!page)
+                       goto error_nopage;
+
+               modrm = *(struct modrm *)&page[pc & PAGE_OFFS_MASK];
+               switch (modrm.mod) {
+               case 0:
+                       if (modrm.rm != 4)
+                               goto error_unsupported;
+
+                       pc++;
+                       page = page_map_get_foreign_page(cpu_id,
+                                                        page_table_addr,
+                                                        cell->page_offset, pc,
+                                                        PAGE_DEFAULT_FLAGS);
+                       if (!page)
+                               goto error_nopage;
+
+                       sib = *(struct sib *)&page[pc & PAGE_OFFS_MASK];
+                       if (sib.ss !=0 || sib.index != 4 || sib.reg != 5)
+                               goto error_unsupported;
+                       access.inst_len += 5;
+                       break;
+               case 2:
+                       access.inst_len += 4;
+                       break;
+               default:
+                       goto error_unsupported;
+               }
+               if (has_regr)
+                       access.reg = 7 - modrm.reg;
+               else if (modrm.reg == 4)
+                       goto error_unsupported;
+               else
+                       access.reg = 15 - modrm.reg;
+       }
+
+       if (does_write != is_write)
+               goto error_inconsitent;
+
+unmap_out:
+       page_map_release_foreign_page(cpu_id);
+
+       spin_unlock(&mmio_lock);
+       return access;
+
+error_nopage:
+       panic_printk("FATAL: unable to map MMIO instruction page\n");
+       goto error;
+
+error_unsupported:
+       panic_printk("FATAL: unsupported instruction\n");
+       goto error;
+
+error_inconsitent:
+       panic_printk("FATAL: inconsistent access, expected %s instruction\n",
+                    is_write ? "write" : "read");
+error:
+       access.inst_len = 0;
+       goto unmap_out;
+}
diff --git a/hypervisor/arch/x86/setup.c b/hypervisor/arch/x86/setup.c
new file mode 100644 (file)
index 0000000..d1c6e98
--- /dev/null
@@ -0,0 +1,213 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/entry.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/processor.h>
+#include <jailhouse/control.h>
+#include <asm/vmx.h>
+#include <asm/apic.h>
+#include <asm/bitops.h>
+
+#define TSS_BUSY_FLAG          (1UL << (9 + 32))
+
+#define NUM_IDT_DESC           20
+
+struct farptr {
+       u64 offs;
+       u16 seg;
+} __attribute__((packed));
+
+static u64 gdt[] = {
+       [GDT_DESC_NULL]   = 0,
+       [GDT_DESC_CODE]   = 0x00af9b000000ffff,
+       [GDT_DESC_TSS]    = 0x0000890000000000,
+       [GDT_DESC_TSS_HI] = 0x0000000000000000,
+};
+
+extern u8 exception_entries[];
+extern u8 nmi_entry[];
+
+static u32 idt[NUM_IDT_DESC * 4];
+
+int arch_init_early(struct cell *linux_cell,
+                   struct jailhouse_cell_desc *config)
+{
+       unsigned long entry;
+       unsigned int vector;
+       int err;
+
+       err = apic_init();
+       if (err)
+               return err;
+
+       entry = (unsigned long)exception_entries;
+       for (vector = 0; vector < NUM_IDT_DESC; vector++) {
+               if (vector == NMI_VECTOR || vector == 15)
+                       continue;
+               idt[vector * 4] = (entry & 0xffff) |
+                       ((GDT_DESC_CODE * 8) << 16);
+               idt[vector * 4 + 1] = 0x8e00 | (entry & 0xffff0000);
+               idt[vector * 4 + 2] = entry >> 32;
+               entry += 16;
+       }
+
+       entry = (unsigned long)nmi_entry;
+       idt[NMI_VECTOR * 4] = (entry & 0xffff) | ((GDT_DESC_CODE * 8) << 16);
+       idt[NMI_VECTOR * 4 + 1] = 0x8e00 | (entry & 0xffff0000);
+       idt[NMI_VECTOR * 4 + 2] = entry >> 32;
+
+       vmx_init();
+
+       err = vmx_cell_init(linux_cell, config);
+       if (err)
+               return err;
+
+       return 0;
+}
+
+static void set_cs(u16 cs)
+{
+       struct farptr jmp_target;
+       unsigned long tmp;
+
+       jmp_target.seg = cs;
+       asm volatile(
+               "lea 1f(%%rip),%0\n\t"
+               "mov %0,%1\n\t"
+               "rex64/ljmp *%2\n\t"
+               "1:"
+               : "=r" (tmp) : "m" (jmp_target.offs), "m" (jmp_target));
+}
+
+int arch_cpu_init(struct per_cpu *cpu_data)
+{
+       struct desc_table_reg dtr;
+       u64 *linux_tr_desc;
+       int err, n;
+
+       /* read GDTR */
+       read_gdtr(&cpu_data->linux_gdtr);
+
+       /* read TR and TSS descriptor */
+       asm volatile("str %0" : "=m" (cpu_data->linux_tr));
+       linux_tr_desc = (u64 *)(cpu_data->linux_gdtr.base +
+               (cpu_data->linux_tr & 0xfff8));
+       cpu_data->linux_tr_base = ((linux_tr_desc[0] >> 16) & 0xffffff) |
+               ((linux_tr_desc[0] >> 32) & 0xff000000) |
+               (linux_tr_desc[1] << 32);
+       cpu_data->linux_tr_limit = (linux_tr_desc[0] & 0xffff) |
+               ((linux_tr_desc[0] >> 32) & 0xff0000);
+       cpu_data->linux_tr_ar_bytes = (linux_tr_desc[0] >> 40) & 0xffff;
+
+       /* read registers to restore on first VM-entry */
+       for (n = 0; n < NUM_ENTRY_REGS; n++)
+               cpu_data->linux_reg[n] =
+                       ((unsigned long *)cpu_data->linux_sp)[n];
+       cpu_data->linux_ip = ((unsigned long *)cpu_data->linux_sp)[6];
+
+       /* swap CR3 */
+       cpu_data->linux_cr3 = read_cr3();
+       write_cr3(page_map_hvirt2phys(hv_page_table));
+
+       /* set GDTR */
+       dtr.limit = NUM_GDT_DESC * 8 - 1;
+       dtr.base = (u64)&gdt;
+       write_gdtr(&dtr);
+
+       /* set CS */
+       asm volatile("mov %%cs,%0": "=m" (cpu_data->linux_cs));
+       set_cs(GDT_DESC_CODE * 8);
+
+       /* paranoid clearing of segment registers */
+       asm volatile(
+               "mov %0,%%es\n\t"
+               "mov %0,%%ds\n\t"
+               "mov %0,%%ss"
+               : : "r" (0));
+
+       /* clear TSS busy flag set by previous loading, then set TR */
+       gdt[GDT_DESC_TSS] &= ~TSS_BUSY_FLAG;
+       asm volatile("ltr %%ax" : : "a" (GDT_DESC_TSS * 8));
+
+       /* swap IDTR */
+       read_idtr(&cpu_data->linux_idtr);
+       dtr.limit = NUM_IDT_DESC * 16 - 1;
+       dtr.base = (u64)&idt;
+       write_idtr(&dtr);
+
+       cpu_data->linux_efer = read_msr(MSR_EFER);
+       cpu_data->linux_fs_base = read_msr(MSR_FS_BASE);
+       cpu_data->linux_gs_base = read_msr(MSR_GS_BASE);
+
+       cpu_data->linux_sysenter_cs = read_msr(MSR_IA32_SYSENTER_CS);
+       cpu_data->linux_sysenter_eip = read_msr(MSR_IA32_SYSENTER_EIP);
+       cpu_data->linux_sysenter_esp = read_msr(MSR_IA32_SYSENTER_ESP);
+
+       cpu_data->initialized = true;
+
+       err = apic_cpu_init(cpu_data);
+       if (err)
+               goto error_out;
+
+       err = vmx_cpu_init(cpu_data);
+       if (err)
+               goto error_out;
+
+       return 0;
+
+error_out:
+       arch_cpu_restore(cpu_data);
+       return err;
+}
+
+int arch_init_late(struct cell *linux_cell,
+                  struct jailhouse_cell_desc *config)
+{
+       return 0;
+}
+
+void arch_cpu_activate_vmm(struct per_cpu *cpu_data)
+{
+       vmx_cpu_activate_vmm(cpu_data);
+}
+
+void arch_cpu_restore(struct per_cpu *cpu_data)
+{
+       u64 *gdt;
+
+       if (!cpu_data->initialized)
+               return;
+
+       vmx_cpu_exit(cpu_data);
+
+       write_msr(MSR_EFER, cpu_data->linux_efer);
+       write_cr3(cpu_data->linux_cr3);
+
+       asm volatile("lgdtq %0" : : "m" (cpu_data->linux_gdtr));
+       asm volatile("lidtq %0" : : "m" (cpu_data->linux_idtr));
+
+       set_cs(cpu_data->linux_cs);
+
+       /* clear busy flag in Linux TSS, then reload it */
+       gdt = (u64 *)cpu_data->linux_gdtr.base;
+       gdt[cpu_data->linux_tr / 8] &= ~TSS_BUSY_FLAG;
+       asm volatile("ltr %%ax" : : "a" (cpu_data->linux_tr));
+
+       write_msr(MSR_FS_BASE, cpu_data->linux_fs_base);
+       write_msr(MSR_GS_BASE, cpu_data->linux_gs_base);
+
+       write_msr(MSR_IA32_SYSENTER_CS, cpu_data->linux_sysenter_cs);
+       write_msr(MSR_IA32_SYSENTER_EIP, cpu_data->linux_sysenter_eip);
+       write_msr(MSR_IA32_SYSENTER_ESP, cpu_data->linux_sysenter_esp);
+}
diff --git a/hypervisor/arch/x86/vmx.c b/hypervisor/arch/x86/vmx.c
new file mode 100644 (file)
index 0000000..ebda670
--- /dev/null
@@ -0,0 +1,990 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/entry.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/processor.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/string.h>
+#include <jailhouse/control.h>
+#include <jailhouse/hypercall.h>
+#include <asm/apic.h>
+#include <asm/fault.h>
+#include <asm/vmx.h>
+
+static u8 __attribute__((aligned(PAGE_SIZE))) msr_bitmap[][0x2000/8] = {
+       [ VMX_MSR_BITMAP_0000_READ ] = {
+               [      0/8 ...  0x7ff/8 ] = 0,
+               [  0x800/8 ...  0x807/8 ] = 0x0c, /* 0x802, 0x803 */
+               [  0x808/8 ...  0x80f/8 ] = 0xa5, /* 0x808, 0x80a, 0x80d */
+               [  0x810/8 ...  0x817/8 ] = 0xff, /* 0x810 - 0x817 */
+               [  0x818/8 ...  0x81f/8 ] = 0xff, /* 0x818 - 0x81f */
+               [  0x820/8 ...  0x827/8 ] = 0xff, /* 0x820 - 0x827 */
+               [  0x828/8 ...  0x82f/8 ] = 0x81, /* 0x828, 0x82f */
+               [  0x830/8 ...  0x837/8 ] = 0xfd, /* 0x830, 0x832 - 0x837 */
+               [  0x838/8 ...  0x83f/8 ] = 0x43, /* 0x838, 0x839, 0x83e */
+               [  0x840/8 ... 0x1fff/8 ] = 0,
+       },
+       [ VMX_MSR_BITMAP_C000_READ ] = {
+               [      0/8 ... 0x1fff/8 ] = 0,
+       },
+       [ VMX_MSR_BITMAP_0000_WRITE ] = {
+               [      0/8 ...  0x807/8 ] = 0,
+               [  0x808/8 ...  0x80f/8 ] = 0x89, /* 0x808, 0x80b, 0x80f */
+               [  0x810/8 ...  0x827/8 ] = 0,
+               [  0x828/8 ...  0x82f/8 ] = 0x81, /* 0x828, 0x82f */
+               [  0x830/8 ...  0x837/8 ] = 0xfd, /* 0x830, 0x832 - 0x837 */
+               [  0x838/8 ...  0x83f/8 ] = 0xc1, /* 0x838, 0x83e, 0x83f */
+               [  0x840/8 ... 0x1fff/8 ] = 0,
+       },
+       [ VMX_MSR_BITMAP_C000_WRITE ] = {
+               [      0/8 ... 0x1fff/8 ] = 0,
+       },
+};
+static u8 __attribute__((aligned(PAGE_SIZE))) apic_access_page[PAGE_SIZE];
+
+static unsigned int vmx_true_msr_offs;
+
+static bool vmxon(struct per_cpu *cpu_data)
+{
+       unsigned long vmxon_addr = page_map_hvirt2phys(cpu_data->vmxon_page);
+       u8 ok;
+
+       asm volatile(
+               "vmxon (%1)\n\t"
+               "seta %0"
+               : "=rm" (ok)
+               : "r" (&vmxon_addr), "m" (vmxon_addr)
+               : "memory", "cc");
+       return ok;
+}
+
+static bool vmcs_clear(struct per_cpu *cpu_data)
+{
+       unsigned long vmcs_addr = page_map_hvirt2phys(cpu_data->vmcs_page);
+       u8 ok;
+
+       asm volatile(
+               "vmclear (%1)\n\t"
+               "seta %0"
+               : "=qm" (ok)
+               : "r" (&vmcs_addr), "m" (vmcs_addr)
+               : "memory", "cc");
+       return ok;
+}
+
+static bool vmcs_load(struct per_cpu *cpu_data)
+{
+       unsigned long vmcs_addr = page_map_hvirt2phys(cpu_data->vmcs_page);
+       u8 ok;
+
+       asm volatile(
+               "vmptrld (%1)\n\t"
+               "seta %0"
+               : "=qm" (ok)
+               : "r" (&vmcs_addr), "m" (vmcs_addr)
+               : "memory", "cc");
+       return ok;
+}
+
+static inline unsigned long vmcs_read64(unsigned long field)
+{
+       unsigned long value;
+
+       asm volatile("vmread %1,%0" : "=r" (value) : "r" (field) : "cc");
+       return value;
+}
+
+static inline u16 vmcs_read16(unsigned long field)
+{
+       return vmcs_read64(field);
+}
+
+static inline u32 vmcs_read32(unsigned long field)
+{
+       return vmcs_read64(field);
+}
+
+static bool vmcs_write64(unsigned long field, unsigned long val)
+{
+       u8 ok;
+
+       asm volatile(
+               "vmwrite %1,%2\n\t"
+               "setnz %0"
+               : "=qm" (ok)
+               : "r" (val), "r" (field)
+               : "cc");
+       if (!ok)
+               printk("FATAL: vmwrite failed, error %d, caller %p\n",
+                      vmcs_read32(VM_INSTRUCTION_ERROR),
+                      __builtin_return_address(0));
+       return ok;
+}
+
+static bool vmcs_write16(unsigned long field, u16 value)
+{
+       return vmcs_write64(field, value);
+}
+
+static bool vmcs_write32(unsigned long field, u32 value)
+{
+       return vmcs_write64(field, value);
+}
+
+void vmx_init(void)
+{
+       if (!using_x2apic)
+               return;
+
+       /* allow direct x2APIC access except for ICR writes */
+       memset(&msr_bitmap[VMX_MSR_BITMAP_0000_READ][MSR_X2APIC_BASE/8], 0,
+              (MSR_X2APIC_END - MSR_X2APIC_BASE + 1)/8);
+       memset(&msr_bitmap[VMX_MSR_BITMAP_0000_WRITE][MSR_X2APIC_BASE/8], 0,
+              (MSR_X2APIC_END - MSR_X2APIC_BASE + 1)/8);
+       msr_bitmap[VMX_MSR_BITMAP_0000_WRITE][MSR_X2APIC_ICR/8] = 0x01;
+}
+
+int vmx_cell_init(struct cell *cell, struct jailhouse_cell_desc *config)
+{
+       struct jailhouse_memory *mem;
+       u32 page_flags, table_flags;
+       u32 pio_bitmap_size, size;
+       u8 *pio_bitmap;
+       int n, err;
+
+       /* build root cell EPT */
+       cell->vmx.ept = page_alloc(&mem_pool, 1);
+       if (!cell->vmx.ept)
+               return -ENOMEM;
+
+       mem = (void *)config + sizeof(struct jailhouse_cell_desc) +
+               config->cpu_set_size;
+
+       for (n = 0; n < config->num_memory_regions; n++, mem++) {
+               page_flags = EPT_FLAG_WB_TYPE;
+               if (mem->access_flags & JAILHOUSE_MEM_READ)
+                       page_flags |= EPT_FLAG_READ;
+               if (mem->access_flags & JAILHOUSE_MEM_WRITE)
+                       page_flags |= EPT_FLAG_WRITE;
+               if (mem->access_flags & JAILHOUSE_MEM_EXECUTE)
+                       page_flags |= EPT_FLAG_EXECUTE;
+               table_flags = page_flags & ~EPT_FLAG_WB_TYPE;
+
+               err = page_map_create(cell->vmx.ept, mem->phys_start,
+                                     mem->size, mem->virt_start, page_flags,
+                                     table_flags, PAGE_DIR_LEVELS);
+               if (err)
+                       /* FIXME: release vmx.ept */
+                       return err;
+       }
+
+       page_flags = EPT_FLAG_READ | EPT_FLAG_WRITE | EPT_FLAG_WB_TYPE;
+       table_flags = EPT_FLAG_READ | EPT_FLAG_WRITE;
+       err = page_map_create(cell->vmx.ept,
+                             page_map_hvirt2phys(apic_access_page),
+                             PAGE_SIZE, XAPIC_BASE, page_flags, table_flags,
+                             PAGE_DIR_LEVELS);
+       if (err)
+               /* FIXME: release vmx.ept */
+               return err;
+
+       pio_bitmap = (void *)mem +
+               config->num_irq_lines * sizeof(struct jailhouse_irq_line);
+       pio_bitmap_size = config->pio_bitmap_size;
+
+       memset(cell->vmx.io_bitmap, -1, sizeof(cell->vmx.io_bitmap));
+
+       for (n = 0; n < 2; n++) {
+               size = pio_bitmap_size <= PAGE_SIZE ?
+                       pio_bitmap_size : PAGE_SIZE;
+               memcpy(cell->vmx.io_bitmap + n * PAGE_SIZE, pio_bitmap, size);
+               pio_bitmap += size;
+               pio_bitmap_size -= size;
+       }
+
+       return 0;
+}
+
+void vmx_cell_shrink(struct cell *cell, struct jailhouse_cell_desc *config)
+{
+       struct jailhouse_memory *mem;
+       u32 pio_bitmap_size;
+       u8 *pio_bitmap, *b;
+       int n;
+
+       mem = (void *)config + sizeof(struct jailhouse_cell_desc) +
+               config->cpu_set_size;
+
+       for (n = 0; n < config->num_memory_regions; n++, mem++)
+               /* FIXME: phys_start only works for the Linux cell. We need
+                * the original memory region, match phys_start and use
+                * virt_start from there. */
+               page_map_destroy(cell->vmx.ept, mem->phys_start, mem->size,
+                                PAGE_DIR_LEVELS);
+
+       pio_bitmap = (void *)mem +
+               config->num_irq_lines * sizeof(struct jailhouse_irq_line);
+       pio_bitmap_size = config->pio_bitmap_size;
+
+       for (b = cell->vmx.io_bitmap; pio_bitmap_size > 0;
+            b++, pio_bitmap++, pio_bitmap_size--)
+               *b |= ~*pio_bitmap;
+
+       vmx_invept();
+}
+
+void vmx_invept(void)
+{
+       unsigned long ept_cap = read_msr(MSR_IA32_VMX_EPT_VPID_CAP);
+       struct {
+               u64 eptp;
+               u64 reserved;
+       } descriptor;
+       u64 type;
+       u8 ok;
+
+       descriptor.reserved = 0;
+       if (ept_cap & EPT_INVEPT_SINGLE) {
+               type = VMX_INVEPT_SINGLE;
+               descriptor.eptp = vmcs_read64(EPT_POINTER);
+       } else {
+               type = VMX_INVEPT_GLOBAL;
+               descriptor.eptp = 0;
+       }
+       asm volatile(
+               "invept (%1),%2\n\t"
+               "seta %0\n\t"
+               : "=qm" (ok)
+               : "r" (&descriptor), "r" (type)
+               : "memory", "cc");
+
+       if (!ok) {
+               panic_printk("FATAL: invept failed, error %d\n",
+                            vmcs_read32(VM_INSTRUCTION_ERROR));
+               panic_stop(NULL);
+       }
+}
+
+static bool vmx_set_guest_cr(int cr, unsigned long val)
+{
+       unsigned long fixed0, fixed1, required1;
+       bool ok = true;
+
+       fixed0 = read_msr(cr ? MSR_IA32_VMX_CR4_FIXED0
+                            : MSR_IA32_VMX_CR0_FIXED0);
+       fixed1 = read_msr(cr ? MSR_IA32_VMX_CR4_FIXED1
+                            : MSR_IA32_VMX_CR0_FIXED1);
+       required1 = fixed0 & fixed1;
+       if (cr == 0) {
+               fixed1 &= ~(X86_CR0_NW | X86_CR0_CD);
+               required1 &= ~(X86_CR0_PE | X86_CR0_PG);
+               required1 |= X86_CR0_ET;
+       } else {
+               /* keeps the hypervisor visible */
+               val |= X86_CR4_VMXE;
+       }
+       ok &= vmcs_write64(cr ? GUEST_CR4 : GUEST_CR0,
+                          (val & fixed1) | required1);
+       ok &= vmcs_write64(cr ? CR4_READ_SHADOW : CR0_READ_SHADOW, val);
+       ok &= vmcs_write64(cr ? CR4_GUEST_HOST_MASK : CR0_GUEST_HOST_MASK,
+                          required1 | ~fixed1);
+
+       return ok;
+}
+
+static bool vmx_set_cell_config(struct cell *cell)
+{
+       u8 *io_bitmap;
+       bool ok = true;
+
+       io_bitmap = cell->vmx.io_bitmap;
+       ok &= vmcs_write64(IO_BITMAP_A, page_map_hvirt2phys(io_bitmap));
+       ok &= vmcs_write64(IO_BITMAP_B,
+                          page_map_hvirt2phys(io_bitmap + PAGE_SIZE));
+
+       ok &= vmcs_write64(EPT_POINTER,
+                          page_map_hvirt2phys(cell->vmx.ept) |
+                          EPT_TYPE_WRITEBACK | EPT_PAGE_WALK_LEN);
+
+       return ok;
+}
+
+static bool vmcs_setup(struct per_cpu *cpu_data)
+{
+       struct desc_table_reg dtr;
+       unsigned long val;
+       bool ok = true;
+
+       ok &= vmcs_write64(HOST_CR0, read_cr0());
+       ok &= vmcs_write64(HOST_CR3, read_cr3());
+       ok &= vmcs_write64(HOST_CR4, read_cr4());
+
+       ok &= vmcs_write16(HOST_CS_SELECTOR, GDT_DESC_CODE * 8);
+       ok &= vmcs_write16(HOST_DS_SELECTOR, 0);
+       ok &= vmcs_write16(HOST_ES_SELECTOR, 0);
+       ok &= vmcs_write16(HOST_SS_SELECTOR, 0);
+       ok &= vmcs_write16(HOST_FS_SELECTOR, 0);
+       ok &= vmcs_write16(HOST_GS_SELECTOR, 0);
+       ok &= vmcs_write16(HOST_TR_SELECTOR, GDT_DESC_TSS * 8);
+
+       ok &= vmcs_write64(HOST_FS_BASE, 0);
+       ok &= vmcs_write64(HOST_GS_BASE, 0);
+       ok &= vmcs_write64(HOST_TR_BASE, 0);
+
+       read_gdtr(&dtr);
+       ok &= vmcs_write64(HOST_GDTR_BASE, dtr.base);
+       read_idtr(&dtr);
+       ok &= vmcs_write64(HOST_IDTR_BASE, dtr.base);
+
+       ok &= vmcs_write64(HOST_IA32_EFER, EFER_LMA | EFER_LME);
+
+       ok &= vmcs_write32(HOST_IA32_SYSENTER_CS, 0);
+       ok &= vmcs_write64(HOST_IA32_SYSENTER_EIP, 0);
+       ok &= vmcs_write64(HOST_IA32_SYSENTER_ESP, 0);
+
+       ok &= vmcs_write64(HOST_RSP, (unsigned long)cpu_data->stack +
+                          sizeof(cpu_data->stack));
+       ok &= vmcs_write64(HOST_RIP, (unsigned long)vm_exit);
+
+       ok &= vmx_set_guest_cr(0, read_cr0());
+       ok &= vmx_set_guest_cr(4, read_cr4());
+
+       ok &= vmcs_write64(GUEST_CR3, cpu_data->linux_cr3);
+
+       ok &= vmcs_write16(GUEST_CS_SELECTOR, cpu_data->linux_cs);
+       ok &= vmcs_write64(GUEST_CS_BASE, 0);
+       ok &= vmcs_write32(GUEST_CS_LIMIT, 0xffffffff);
+       ok &= vmcs_write32(GUEST_CS_AR_BYTES, 0x0a09b);
+
+       ok &= vmcs_write16(GUEST_DS_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_DS_BASE, 0);
+       ok &= vmcs_write32(GUEST_DS_LIMIT, 0);
+       ok &= vmcs_write32(GUEST_DS_AR_BYTES, 0x10000);
+
+       ok &= vmcs_write16(GUEST_ES_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_ES_BASE, 0);
+       ok &= vmcs_write32(GUEST_ES_LIMIT, 0);
+       ok &= vmcs_write32(GUEST_ES_AR_BYTES, 0x10000);
+
+       ok &= vmcs_write16(GUEST_FS_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_FS_BASE, cpu_data->linux_fs_base);
+       ok &= vmcs_write32(GUEST_FS_LIMIT, 0);
+       ok &= vmcs_write32(GUEST_FS_AR_BYTES, 0x10000);
+
+       ok &= vmcs_write16(GUEST_GS_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_GS_BASE, cpu_data->linux_gs_base);
+       ok &= vmcs_write32(GUEST_GS_LIMIT, 0);
+       ok &= vmcs_write32(GUEST_GS_AR_BYTES, 0x10000);
+
+       ok &= vmcs_write16(GUEST_SS_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_SS_BASE, 0);
+       ok &= vmcs_write32(GUEST_SS_LIMIT, 0);
+       ok &= vmcs_write32(GUEST_SS_AR_BYTES, 0x10000);
+
+       ok &= vmcs_write16(GUEST_TR_SELECTOR, cpu_data->linux_tr);
+       ok &= vmcs_write64(GUEST_TR_BASE, cpu_data->linux_tr_base);
+       ok &= vmcs_write32(GUEST_TR_LIMIT, cpu_data->linux_tr_limit);
+       ok &= vmcs_write32(GUEST_TR_AR_BYTES, cpu_data->linux_tr_ar_bytes);
+
+       ok &= vmcs_write16(GUEST_LDTR_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_LDTR_BASE, 0);
+       ok &= vmcs_write32(GUEST_LDTR_LIMIT, 0);
+       ok &= vmcs_write32(GUEST_LDTR_AR_BYTES, 0x10000);
+
+       ok &= vmcs_write64(GUEST_GDTR_BASE, cpu_data->linux_gdtr.base);
+       ok &= vmcs_write32(GUEST_GDTR_LIMIT, cpu_data->linux_gdtr.limit);
+       ok &= vmcs_write64(GUEST_IDTR_BASE, cpu_data->linux_idtr.base);
+       ok &= vmcs_write32(GUEST_IDTR_LIMIT, cpu_data->linux_idtr.limit);
+
+       ok &= vmcs_write64(GUEST_RFLAGS, 0x02);
+       ok &= vmcs_write64(GUEST_RSP, cpu_data->linux_sp +
+                          (NUM_ENTRY_REGS + 1) * sizeof(unsigned long));
+       ok &= vmcs_write64(GUEST_RIP, cpu_data->linux_ip);
+
+       ok &= vmcs_write32(GUEST_SYSENTER_CS,
+                          read_msr(MSR_IA32_SYSENTER_CS));
+       ok &= vmcs_write64(GUEST_SYSENTER_EIP,
+                          read_msr(MSR_IA32_SYSENTER_EIP));
+       ok &= vmcs_write64(GUEST_SYSENTER_ESP,
+                          read_msr(MSR_IA32_SYSENTER_ESP));
+
+       ok &= vmcs_write64(GUEST_DR7, 0x00000400);
+
+       ok &= vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+       ok &= vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
+       ok &= vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+
+       ok &= vmcs_write64(GUEST_IA32_EFER, cpu_data->linux_efer);
+
+       // TODO: switch PAT, PERF */
+
+       ok &= vmcs_write64(VMCS_LINK_POINTER, -1UL);
+       ok &= vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
+
+       val = read_msr(MSR_IA32_VMX_PINBASED_CTLS + vmx_true_msr_offs);
+       val |= PIN_BASED_NMI_EXITING;
+       ok &= vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, val);
+
+       ok &= vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+       val = read_msr(MSR_IA32_VMX_PROCBASED_CTLS + vmx_true_msr_offs);
+       val |= CPU_BASED_USE_IO_BITMAPS | CPU_BASED_USE_MSR_BITMAPS |
+               CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+       ok &= vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, val);
+
+       ok &= vmcs_write64(MSR_BITMAP, page_map_hvirt2phys(msr_bitmap));
+
+       val = read_msr(MSR_IA32_VMX_PROCBASED_CTLS2);
+       val |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+               SECONDARY_EXEC_ENABLE_EPT | SECONDARY_EXEC_UNRESTRICTED_GUEST;
+       ok &= vmcs_write32(SECONDARY_VM_EXEC_CONTROL, val);
+
+       ok &= vmcs_write64(APIC_ACCESS_ADDR,
+                          page_map_hvirt2phys(apic_access_page));
+
+       ok &= vmx_set_cell_config(cpu_data->cell);
+
+       ok &= vmcs_write32(EXCEPTION_BITMAP, 0);
+
+       val = read_msr(MSR_IA32_VMX_EXIT_CTLS + vmx_true_msr_offs);
+       val |= VM_EXIT_HOST_ADDR_SPACE_SIZE | VM_EXIT_SAVE_IA32_EFER |
+               VM_EXIT_LOAD_IA32_EFER;
+       ok &= vmcs_write32(VM_EXIT_CONTROLS, val);
+
+       ok &= vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+       ok &= vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
+       ok &= vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
+
+       val = read_msr(MSR_IA32_VMX_ENTRY_CTLS + vmx_true_msr_offs);
+       val |= VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER;
+       ok &= vmcs_write32(VM_ENTRY_CONTROLS, val);
+
+       ok &= vmcs_write64(CR4_GUEST_HOST_MASK, 0);
+
+       ok &= vmcs_write32(CR3_TARGET_COUNT, 0);
+
+       return ok;
+}
+
+int vmx_cpu_init(struct per_cpu *cpu_data)
+{
+       unsigned long vmx_proc_ctrl, vmx_proc_ctrl2, ept_cap;
+       unsigned long vmx_pin_ctrl, feature_ctrl, mask;
+       unsigned long vmx_basic;
+       unsigned long cr4;
+       u32 revision_id;
+
+       if (!(cpuid_ecx(1) & X86_FEATURE_VMX))
+               return -ENODEV;
+
+       cr4 = read_cr4();
+       if (cr4 & X86_CR4_VMXE)
+               return -EBUSY;
+
+       vmx_basic = read_msr(MSR_IA32_VMX_BASIC);
+
+       /* require VMCS size <= PAGE_SIZE */
+       if (((vmx_basic >> 32) & 0x1fff) > PAGE_SIZE)
+               return -EIO;
+
+       /* require VMCS memory access type == write back */
+       if (((vmx_basic >> 50) & 0xf) != 6)
+               return -EIO;
+
+       if (vmx_basic & (1UL << 55))
+               vmx_true_msr_offs = MSR_IA32_VMX_TRUE_PINBASED_CTLS -
+                       MSR_IA32_VMX_PINBASED_CTLS;
+
+       /* require NMI exiting and preemption timer support */
+       vmx_pin_ctrl = read_msr(MSR_IA32_VMX_PINBASED_CTLS +
+                               vmx_true_msr_offs) >> 32;
+       if (!(vmx_pin_ctrl & PIN_BASED_NMI_EXITING) ||
+           !(vmx_pin_ctrl & PIN_BASED_VMX_PREEMPTION_TIMER))
+               return -EIO;
+
+       /* require I/O and MSR bitmap as well as secondary controls support */
+       vmx_proc_ctrl = read_msr(MSR_IA32_VMX_PROCBASED_CTLS +
+                                vmx_true_msr_offs) >> 32;
+       if (!(vmx_proc_ctrl & CPU_BASED_USE_IO_BITMAPS) ||
+           !(vmx_proc_ctrl & CPU_BASED_USE_MSR_BITMAPS) ||
+           !(vmx_proc_ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+               return -EIO;
+
+       /* require APIC access, EPT and unrestricted guest mode support */
+       vmx_proc_ctrl2 = read_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
+       ept_cap = read_msr(MSR_IA32_VMX_EPT_VPID_CAP);
+       if (!(vmx_proc_ctrl2 & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) ||
+           !(vmx_proc_ctrl2 & SECONDARY_EXEC_ENABLE_EPT) ||
+           (ept_cap & EPT_MANDATORY_FEATURES) != EPT_MANDATORY_FEATURES ||
+           !(ept_cap & (EPT_INVEPT_SINGLE | EPT_INVEPT_GLOBAL)) ||
+           !(vmx_proc_ctrl2 & SECONDARY_EXEC_UNRESTRICTED_GUEST))
+               return -EIO;
+
+       revision_id = (u32)vmx_basic;
+       *(u32 *)cpu_data->vmxon_page = revision_id;
+       *(u32 *)cpu_data->vmcs_page = revision_id;
+
+       // TODO: validate CR0
+
+       /* Note: We assume that TXT is off */
+       feature_ctrl = read_msr(MSR_IA32_FEATURE_CONTROL);
+       mask = FEATURE_CONTROL_LOCKED |
+               FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+
+       if ((feature_ctrl & mask) != mask) {
+               if (feature_ctrl & FEATURE_CONTROL_LOCKED)
+                       return -ENODEV;
+
+               feature_ctrl |= mask;
+               write_msr(MSR_IA32_FEATURE_CONTROL, feature_ctrl);
+       }
+
+       write_cr4(cr4 | X86_CR4_VMXE);
+       // TODO: validate CR4
+
+       if (!vmxon(cpu_data))  {
+               write_cr4(cr4);
+               return -EIO;
+       }
+
+       cpu_data->vmx_state = VMXON;
+
+       if (!vmcs_clear(cpu_data) ||
+           !vmcs_load(cpu_data) ||
+           !vmcs_setup(cpu_data))
+               return -EIO;
+
+       cpu_data->vmx_state = VMCS_READY;
+
+       return 0;
+}
+
+void vmx_cpu_exit(struct per_cpu *cpu_data)
+{
+       if (cpu_data->vmx_state == VMXOFF)
+               return;
+
+       cpu_data->vmx_state = VMXOFF;
+       vmcs_clear(cpu_data);
+       asm volatile("vmxoff" : : : "cc");
+       write_cr4(read_cr4() & ~X86_CR4_VMXE);
+}
+
+void vmx_cpu_activate_vmm(struct per_cpu *cpu_data)
+{
+       /* We enter Linux at the point arch_entry would return to as well.
+        * rax is cleared to signal success to the caller. */
+       asm volatile(
+               "mov (%%rdi),%%r15\n\t"
+               "mov 0x8(%%rdi),%%r14\n\t"
+               "mov 0x10(%%rdi),%%r13\n\t"
+               "mov 0x18(%%rdi),%%r12\n\t"
+               "mov 0x20(%%rdi),%%rbx\n\t"
+               "mov 0x28(%%rdi),%%rbp\n\t"
+               "vmlaunch\n\t"
+               "pop %%rbp"
+               : /* no output */
+               : "a" (0), "D" (cpu_data->linux_reg)
+               : "memory", "r15", "r14", "r13", "r12", "rbx", "rbp", "cc");
+
+       panic_printk("FATAL: vmlaunch failed, error %d\n",
+                    vmcs_read32(VM_INSTRUCTION_ERROR));
+       panic_stop(cpu_data);
+}
+
+static void __attribute__((noreturn))
+vmx_cpu_deactivate_vmm(struct registers *guest_regs, struct per_cpu *cpu_data)
+{
+       unsigned long *stack = (unsigned long *)vmcs_read64(GUEST_RSP);
+       unsigned long linux_ip = vmcs_read64(GUEST_RIP);
+
+       cpu_data->linux_cr3 = vmcs_read64(GUEST_CR3);
+
+       cpu_data->linux_gdtr.base = vmcs_read64(GUEST_GDTR_BASE);
+       cpu_data->linux_gdtr.limit = vmcs_read64(GUEST_GDTR_LIMIT);
+       cpu_data->linux_idtr.base = vmcs_read64(GUEST_IDTR_BASE);
+       cpu_data->linux_idtr.limit = vmcs_read64(GUEST_IDTR_LIMIT);
+
+       cpu_data->linux_cs = vmcs_read32(GUEST_CS_SELECTOR);
+
+       cpu_data->linux_tr = vmcs_read32(GUEST_TR_SELECTOR);
+
+       cpu_data->linux_efer = vmcs_read64(GUEST_IA32_EFER);
+       cpu_data->linux_fs_base = vmcs_read64(GUEST_FS_BASE);
+       cpu_data->linux_gs_base = vmcs_read64(GUEST_GS_BASE);
+
+       cpu_data->linux_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
+       cpu_data->linux_sysenter_eip = vmcs_read64(GUEST_SYSENTER_EIP);
+       cpu_data->linux_sysenter_esp = vmcs_read64(GUEST_SYSENTER_ESP);
+
+       arch_cpu_restore(cpu_data);
+
+       stack--;
+       *stack = linux_ip;
+
+       asm volatile (
+               "mov %%rbx,%%rsp\n\t"
+               "pop %%r15\n\t"
+               "pop %%r14\n\t"
+               "pop %%r13\n\t"
+               "pop %%r12\n\t"
+               "pop %%r11\n\t"
+               "pop %%r10\n\t"
+               "pop %%r9\n\t"
+               "pop %%r8\n\t"
+               "pop %%rdi\n\t"
+               "pop %%rsi\n\t"
+               "pop %%rbp\n\t"
+               "add $8,%%rsp\n\t"
+               "pop %%rbx\n\t"
+               "pop %%rdx\n\t"
+               "pop %%rcx\n\t"
+               "mov %%rax,%%rsp\n\t"
+               "xor %%rax,%%rax\n\t"
+               "ret"
+               : : "a" (stack), "b" (guest_regs));
+       __builtin_unreachable();
+}
+
+static void vmx_cpu_reset(struct registers *guest_regs,
+                         struct per_cpu *cpu_data, unsigned int sipi_vector)
+{
+       unsigned long val;
+       bool ok = true;
+
+       ok &= vmx_set_guest_cr(0, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
+       ok &= vmx_set_guest_cr(4, 0);
+
+       ok &= vmcs_write64(GUEST_CR3, 0);
+
+       ok &= vmcs_write64(GUEST_RFLAGS, 0x02);
+       ok &= vmcs_write64(GUEST_RSP, 0);
+
+       val = 0;
+       if (sipi_vector == APIC_BSP_PSEUDO_SIPI) {
+               val = 0xfff0;
+               sipi_vector = 0xf0;
+       }
+       ok &= vmcs_write64(GUEST_RIP, val);
+
+       ok &= vmcs_write16(GUEST_CS_SELECTOR, sipi_vector << 8);
+       ok &= vmcs_write64(GUEST_CS_BASE, sipi_vector << 12);
+       ok &= vmcs_write32(GUEST_CS_LIMIT, 0xffff);
+       ok &= vmcs_write32(GUEST_CS_AR_BYTES, 0x0009b);
+
+       ok &= vmcs_write16(GUEST_DS_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_DS_BASE, 0);
+       ok &= vmcs_write32(GUEST_DS_LIMIT, 0xffff);
+       ok &= vmcs_write32(GUEST_DS_AR_BYTES, 0x00093);
+
+       ok &= vmcs_write16(GUEST_ES_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_ES_BASE, 0);
+       ok &= vmcs_write32(GUEST_ES_LIMIT, 0xffff);
+       ok &= vmcs_write32(GUEST_ES_AR_BYTES, 0x00093);
+
+       ok &= vmcs_write16(GUEST_FS_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_FS_BASE, 0);
+       ok &= vmcs_write32(GUEST_FS_LIMIT, 0xffff);
+       ok &= vmcs_write32(GUEST_FS_AR_BYTES, 0x00093);
+
+       ok &= vmcs_write16(GUEST_GS_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_GS_BASE, 0);
+       ok &= vmcs_write32(GUEST_GS_LIMIT, 0xffff);
+       ok &= vmcs_write32(GUEST_GS_AR_BYTES, 0x00093);
+
+       ok &= vmcs_write16(GUEST_SS_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_SS_BASE, 0);
+       ok &= vmcs_write32(GUEST_SS_LIMIT, 0xffff);
+       ok &= vmcs_write32(GUEST_SS_AR_BYTES, 0x00093);
+
+       ok &= vmcs_write16(GUEST_TR_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_TR_BASE, 0);
+       ok &= vmcs_write32(GUEST_TR_LIMIT, 0xffff);
+       ok &= vmcs_write32(GUEST_TR_AR_BYTES, 0x0008b);
+
+       ok &= vmcs_write16(GUEST_LDTR_SELECTOR, 0);
+       ok &= vmcs_write64(GUEST_LDTR_BASE, 0);
+       ok &= vmcs_write32(GUEST_LDTR_LIMIT, 0xffff);
+       ok &= vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
+
+       ok &= vmcs_write64(GUEST_GDTR_BASE, 0);
+       ok &= vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
+       ok &= vmcs_write64(GUEST_IDTR_BASE, 0);
+       ok &= vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
+
+       ok &= vmcs_write64(GUEST_IA32_EFER, 0);
+
+       ok &= vmcs_write32(GUEST_SYSENTER_CS, 0);
+       ok &= vmcs_write64(GUEST_SYSENTER_EIP, 0);
+       ok &= vmcs_write64(GUEST_SYSENTER_ESP, 0);
+
+       ok &= vmcs_write64(GUEST_DR7, 0x00000400);
+
+       ok &= vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+       ok &= vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
+       ok &= vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+
+       val = vmcs_read32(VM_ENTRY_CONTROLS);
+       val &= ~VM_ENTRY_IA32E_MODE;
+       ok &= vmcs_write32(VM_ENTRY_CONTROLS, val);
+
+       ok &= vmx_set_cell_config(cpu_data->cell);
+
+       memset(guest_regs, 0, sizeof(*guest_regs));
+
+       if (!ok) {
+               panic_printk("FATAL: CPU reset failed\n");
+               panic_stop(cpu_data);
+       }
+}
+
+void vmx_schedule_vmexit(struct per_cpu *cpu_data)
+{
+       u32 pin_based_ctrl;
+
+       if (!cpu_data->vmx_state == VMCS_READY)
+               return;
+
+       pin_based_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+       pin_based_ctrl |= PIN_BASED_VMX_PREEMPTION_TIMER;
+       vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, pin_based_ctrl);
+}
+
+static void vmx_disable_preemption_timer(void)
+{
+       u32 pin_based_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
+
+       pin_based_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+       vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, pin_based_ctrl);
+}
+
+static void skip_emulated_instruction(unsigned int inst_len)
+{
+       vmcs_write64(GUEST_RIP, vmcs_read64(GUEST_RIP) + inst_len);
+}
+
+static void update_efer(void)
+{
+       unsigned long efer = vmcs_read64(GUEST_IA32_EFER);
+
+       if ((efer & (EFER_LME | EFER_LMA)) != EFER_LME)
+               return;
+
+       efer |= EFER_LMA;
+       vmcs_write64(GUEST_IA32_EFER, efer);
+       vmcs_write32(VM_ENTRY_CONTROLS,
+                    vmcs_read32(VM_ENTRY_CONTROLS) | VM_ENTRY_IA32E_MODE);
+}
+
+static bool vmx_handle_cr(struct registers *guest_regs,
+                         struct per_cpu *cpu_data)
+{
+       u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+       unsigned long cr, reg, val;
+
+       cr = exit_qualification & 0xf;
+       reg = (exit_qualification >> 8) & 0xf;
+
+       switch ((exit_qualification >> 4) & 3) {
+       case 0: /* move to cr */
+               if (reg == 4)
+                       val = vmcs_read64(GUEST_RSP);
+               else
+                       val = ((unsigned long *)guest_regs)[15 - reg];
+
+               if (cr == 0 || cr == 4) {
+                       skip_emulated_instruction(X86_INST_LEN_MOV_TO_CR);
+                       /* TODO: check for #GP reasons */
+                       vmx_set_guest_cr(cr, val);
+                       if (cr == 0 && val & X86_CR0_PG)
+                               update_efer();
+                       return true;
+               }
+               break;
+       default:
+               break;
+       }
+       panic_printk("FATAL: Unhandled CR access, qualification %x\n",
+                    exit_qualification);
+       return false;
+}
+
+static bool vmx_handle_apic_access(struct registers *guest_regs,
+                                  struct per_cpu *cpu_data)
+{
+       unsigned int inst_len, offset;
+       unsigned long page_table_addr;
+       u64 qualification;
+       bool is_write;
+
+       qualification = vmcs_read64(EXIT_QUALIFICATION);
+
+       switch (qualification & APIC_ACCESS_TYPE_MASK) {
+       case APIC_ACCESS_TYPE_LINEAR_READ:
+       case APIC_ACCESS_TYPE_LINEAR_WRITE:
+               is_write = !!(qualification & APIC_ACCESS_TYPE_LINEAR_WRITE);
+               offset = qualification & APIC_ACCESS_OFFET_MASK;
+               if (offset & 0x00f)
+                       break;
+
+               page_table_addr = vmcs_read64(GUEST_CR3) & PAGE_ADDR_MASK;
+
+               inst_len = apic_mmio_access(guest_regs, cpu_data,
+                                           vmcs_read64(GUEST_RIP),
+                                           page_table_addr, offset >> 4,
+                                           is_write);
+               if (!inst_len)
+                       return false;
+
+               skip_emulated_instruction(inst_len);
+               return true;
+       }
+       panic_printk("FATAL: Unhandled APIC access, "
+                    "qualification %x\n", qualification);
+       return false;
+}
+
+static void dump_vm_exit_details(u32 reason)
+{
+       panic_printk("qualification %x\n", vmcs_read64(EXIT_QUALIFICATION));
+       panic_printk("vectoring info: %x interrupt info: %x\n",
+                    vmcs_read32(IDT_VECTORING_INFO_FIELD),
+                    vmcs_read32(VM_EXIT_INTR_INFO));
+       if (reason == EXIT_REASON_EPT_VIOLATION ||
+           reason == EXIT_REASON_EPT_MISCONFIG)
+               panic_printk("guest phys addr %p guest linear addr: %p\n",
+                            vmcs_read64(GUEST_PHYSICAL_ADDRESS),
+                            vmcs_read64(GUEST_LINEAR_ADDRESS));
+}
+
+static void dump_guest_regs(struct registers *guest_regs)
+{
+       panic_printk("RIP: %p RSP: %p FLAGS: %x\n", vmcs_read64(GUEST_RIP),
+                    vmcs_read64(GUEST_RSP), vmcs_read64(GUEST_RFLAGS));
+       panic_printk("RAX: %p RBX: %p RCX: %p\n", guest_regs->rax,
+                    guest_regs->rbx, guest_regs->rcx);
+       panic_printk("RDX: %p RSI: %p RDI: %p\n", guest_regs->rdx,
+                    guest_regs->rsi, guest_regs->rdi);
+       panic_printk("CS: %x BASE: %p AR-BYTES: %x EFER.LMA %d\n",
+                    vmcs_read64(GUEST_CS_SELECTOR),
+                    vmcs_read64(GUEST_CS_BASE),
+                    vmcs_read32(GUEST_CS_AR_BYTES),
+                    !!(vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE));
+       panic_printk("CR0: %p CR3: %p CR4: %p\n", vmcs_read64(GUEST_CR0),
+                    vmcs_read64(GUEST_CR3), vmcs_read64(GUEST_CR4));
+       panic_printk("EFER: %p\n", vmcs_read64(GUEST_IA32_EFER));
+}
+
+void vmx_handle_exit(struct registers *guest_regs, struct per_cpu *cpu_data)
+{
+       u32 reason = vmcs_read32(VM_EXIT_REASON);
+       int sipi_vector;
+
+       if (reason & EXIT_REASONS_FAILED_VMENTRY) {
+               panic_printk("FATAL: VM-Entry failure, reason %d\n",
+                            (u16)reason);
+               goto dump_and_stop;
+       }
+
+       switch (reason) {
+       case EXIT_REASON_EXCEPTION_NMI:
+               asm volatile("int %0" : : "i" (NMI_VECTOR));
+               /* fall through */
+       case EXIT_REASON_PREEMPTION_TIMER:
+               vmx_disable_preemption_timer();
+               sipi_vector = apic_handle_events(cpu_data);
+               if (sipi_vector >= 0) {
+                       printk("CPU %d received SIPI, vector %x\n",
+                              cpu_data->cpu_id, sipi_vector);
+                       vmx_cpu_reset(guest_regs, cpu_data, sipi_vector);
+               }
+               return;
+       case EXIT_REASON_CPUID:
+               skip_emulated_instruction(X86_INST_LEN_CPUID);
+               guest_regs->rax &= 0xffffffff;
+               guest_regs->rbx &= 0xffffffff;
+               guest_regs->rcx &= 0xffffffff;
+               guest_regs->rdx &= 0xffffffff;
+               __cpuid((u32 *)&guest_regs->rax, (u32 *)&guest_regs->rbx,
+                       (u32 *)&guest_regs->rcx, (u32 *)&guest_regs->rdx);
+               return;
+       case EXIT_REASON_VMCALL:
+               skip_emulated_instruction(X86_INST_LEN_VMCALL);
+               switch (guest_regs->rax) {
+               case JAILHOUSE_HC_DISABLE:
+                       guest_regs->rax = shutdown(cpu_data);
+                       if (guest_regs->rax == 0)
+                               vmx_cpu_deactivate_vmm(guest_regs, cpu_data);
+                       break;
+               case JAILHOUSE_HC_CELL_CREATE:
+                       guest_regs->rax = cell_create(cpu_data,
+                                                     guest_regs->rdi);
+                       break;
+               default:
+                       printk("CPU %d: Unknown vmcall %d, RIP: %p\n",
+                              cpu_data->cpu_id, guest_regs->rax,
+                              vmcs_read64(GUEST_RIP) - X86_INST_LEN_VMCALL);
+                       guest_regs->rax = -ENOSYS;
+                       break;
+               }
+               return;
+       case EXIT_REASON_CR_ACCESS:
+               if (vmx_handle_cr(guest_regs, cpu_data))
+                       return;
+               break;
+       case EXIT_REASON_MSR_READ:
+               skip_emulated_instruction(X86_INST_LEN_RDMSR);
+               if (guest_regs->rcx >= MSR_X2APIC_BASE &&
+                   guest_regs->rcx <= MSR_X2APIC_END) {
+                       x2apic_handle_read(guest_regs);
+                       return;
+               }
+               panic_printk("FATAL: Unhandled MSR read: %08x\n",
+                            guest_regs->rcx);
+               break;
+       case EXIT_REASON_MSR_WRITE:
+               skip_emulated_instruction(X86_INST_LEN_WRMSR);
+               if (guest_regs->rcx == MSR_X2APIC_ICR) {
+                       apic_handle_icr_write(cpu_data, guest_regs->rax,
+                                             guest_regs->rdx);
+                       return;
+               }
+               if (guest_regs->rcx >= MSR_X2APIC_BASE &&
+                   guest_regs->rcx <= MSR_X2APIC_END) {
+                       x2apic_handle_write(guest_regs);
+                       return;
+               }
+               panic_printk("FATAL: Unhandled MSR write: %08x\n",
+                            guest_regs->rcx);
+               break;
+       case EXIT_REASON_APIC_ACCESS:
+               if (vmx_handle_apic_access(guest_regs, cpu_data))
+                       return;
+               break;
+       default:
+               panic_printk("FATAL: Unhandled VM-Exit, reason %d, ",
+                            (u16)reason);
+               dump_vm_exit_details(reason);
+               break;
+       }
+dump_and_stop:
+       dump_guest_regs(guest_regs);
+       panic_stop(cpu_data);
+}
+
+void vmx_entry_failure(struct per_cpu *cpu_data)
+{
+       panic_printk("FATAL: vmresume failed, error %d\n",
+                    vmcs_read32(VM_INSTRUCTION_ERROR));
+       panic_stop(cpu_data);
+}
diff --git a/hypervisor/control.c b/hypervisor/control.c
new file mode 100644 (file)
index 0000000..7efa6bb
--- /dev/null
@@ -0,0 +1,283 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/entry.h>
+#include <jailhouse/control.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/string.h>
+#include <asm/bitops.h>
+#include <asm/spinlock.h>
+
+struct jailhouse_system *system_config;
+struct cell *cell_list;
+
+static DEFINE_SPINLOCK(shutdown_lock);
+
+unsigned int next_cpu(unsigned int cpu, struct cpu_set *cpu_set, int exception)
+{
+       do
+               cpu++;
+       while (cpu <= cpu_set->max_cpu_id &&
+              (cpu == exception || !test_bit(cpu, cpu_set->bitmap)));
+       return cpu;
+}
+
+static void cell_suspend(struct per_cpu *cpu_data)
+{
+       struct cell *cell = cpu_data->cell;
+       unsigned int cpu;
+
+       for_each_cpu_except(cpu, cell->cpu_set, cpu_data->cpu_id)
+               arch_suspend_cpu(cpu);
+       printk("Suspended cell \"%s\"\n", cell->name);
+}
+
+static void cell_resume(struct per_cpu *cpu_data)
+{
+       unsigned int cpu;
+
+       for_each_cpu_except(cpu, cpu_data->cell->cpu_set, cpu_data->cpu_id)
+               arch_resume_cpu(cpu);
+}
+
+static unsigned int get_free_cell_id(void)
+{
+       unsigned int id = 0;
+       struct cell *cell;
+
+retry:
+       for (cell = cell_list; cell; cell = cell->next)
+               if (cell->id == id) {
+                       id++;
+                       goto retry;
+               }
+
+       return id;
+}
+
+int cell_init(struct cell *cell, struct jailhouse_cell_desc *config,
+             bool copy_cpu_set)
+{
+       unsigned long *config_cpu_set =
+               (unsigned long *)(((void *)config) +
+                                 sizeof(struct jailhouse_cell_desc));
+       unsigned long cpu_set_size = config->cpu_set_size;
+       struct jailhouse_memory *config_ram =
+               (struct jailhouse_memory *)(((void *)config_cpu_set) +
+                                           cpu_set_size);
+       struct cpu_set *cpu_set;
+
+       memcpy(cell->name, config->name, sizeof(cell->name));
+       cell->id = get_free_cell_id();
+
+       if (cpu_set_size > PAGE_SIZE)
+               return -EINVAL;
+       else if (cpu_set_size > sizeof(cell->small_cpu_set.bitmap)) {
+               cpu_set = page_alloc(&mem_pool, 1);
+               if (!cpu_set)
+                       return -ENOMEM;
+               cpu_set->max_cpu_id =
+                       ((PAGE_SIZE - sizeof(unsigned long)) * 8) - 1;
+       } else {
+               cpu_set = &cell->small_cpu_set;
+               cpu_set->max_cpu_id =
+                       (sizeof(cell->small_cpu_set.bitmap) * 8) - 1;
+       }
+       cell->cpu_set = cpu_set;
+       if (copy_cpu_set)
+               memcpy(cell->cpu_set->bitmap, config_cpu_set, cpu_set_size);
+
+       cell->page_offset = config_ram->phys_start;
+
+       return 0;
+}
+
+static void destroy_cpu_set(struct cell *cell)
+{
+       if (cell->cpu_set != &cell->small_cpu_set)
+               page_free(&mem_pool, cell->cpu_set, 1);
+}
+
+int check_mem_regions(struct jailhouse_cell_desc *config)
+{
+       struct jailhouse_memory *mem;
+       unsigned int n;
+
+       mem = (void *)config + sizeof(struct jailhouse_cell_desc) +
+               config->cpu_set_size;
+
+       for (n = 0; n < config->num_memory_regions; n++, mem++) {
+               if (mem->phys_start & ~PAGE_MASK ||
+                   mem->virt_start & ~PAGE_MASK ||
+                   mem->size & ~PAGE_MASK ||
+                   mem->access_flags & ~JAILHOUSE_MEM_VALID_FLAGS) {
+                       printk("FATAL: Invalid memory bar (%p, %p, %p, %x)\n",
+                              mem->phys_start, mem->virt_start, mem->size,
+                              mem->access_flags);
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
+
+int cell_create(struct per_cpu *cpu_data, unsigned long config_address)
+{
+       unsigned long header_size, total_size;
+       struct jailhouse_cell_desc *cfg;
+       struct cpu_set *shrinking_set;
+       unsigned int cell_pages, cpu;
+       struct cell *cell, *last;
+       int err;
+
+       cell_suspend(cpu_data);
+
+       header_size = (config_address & ~PAGE_MASK) +
+               sizeof(struct jailhouse_cell_desc);
+
+       err = page_map_create(hv_page_table, config_address & PAGE_MASK,
+                             header_size, FOREIGN_MAPPING_BASE,
+                             PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+                             PAGE_DIR_LEVELS);
+       if (err)
+               goto resume_out;
+
+       cfg = (struct jailhouse_cell_desc *)(FOREIGN_MAPPING_BASE +
+                                            (config_address & ~PAGE_MASK));
+       total_size = jailhouse_cell_config_size(cfg);
+       if (total_size >
+           hypervisor_header.possible_cpus * NUM_FOREIGN_PAGES * PAGE_SIZE) {
+               total_size = PAGE_SIZE;
+               err = -ENOMEM;
+               goto unmap_out;
+       }
+
+       err = page_map_create(hv_page_table, config_address & PAGE_MASK,
+                             total_size, FOREIGN_MAPPING_BASE,
+                             PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+                             PAGE_DIR_LEVELS);
+       if (err)
+               goto unmap_out;
+
+       err = check_mem_regions(cfg);
+       if (err)
+               goto unmap_out;
+
+       cell_pages = PAGE_ALIGN(sizeof(*cell)) / PAGE_SIZE;
+       cell = page_alloc(&mem_pool, cell_pages);
+       if (!cell) {
+               err = -ENOMEM;
+               goto unmap_out;
+       }
+
+       err = cell_init(cell, cfg, true);
+       if (err)
+               goto err_free_cell;
+
+       /* don't assign the CPU we are currently running on */
+       if (cpu_data->cpu_id <= cell->cpu_set->max_cpu_id &&
+           test_bit(cpu_data->cpu_id, cell->cpu_set->bitmap)) {
+               err = -EBUSY;
+               goto err_free_cpu_set;
+       }
+
+       shrinking_set = cpu_data->cell->cpu_set;
+
+       /* shrinking set must be super-set of new cell's cpu set */
+       if (shrinking_set->max_cpu_id < cell->cpu_set->max_cpu_id) {
+               err = -EINVAL;
+               goto err_free_cpu_set;
+       }
+       for_each_cpu(cpu, cell->cpu_set)
+               if (!test_bit(cpu, shrinking_set->bitmap)) {
+                       err = -EINVAL;
+                       goto err_free_cpu_set;
+               }
+
+       for_each_cpu(cpu, cell->cpu_set)
+               clear_bit(cpu, shrinking_set->bitmap);
+
+       err = arch_cell_create(cpu_data, cell, cfg);
+       if (err)
+               goto err_restore_cpu_set;
+
+       last = cell_list;
+       while (last->next)
+               last = last->next;
+       last->next = cell;
+
+       /* update cell references and clean up before releasing the cpus of
+        * the new cell */
+       for_each_cpu(cpu, cell->cpu_set)
+               per_cpu(cpu)->cell = cell;
+
+       printk("Created cell \"%s\"\n", cell->name);
+
+       page_map_destroy(hv_page_table, FOREIGN_MAPPING_BASE, total_size,
+                        PAGE_DIR_LEVELS);
+       page_map_dump_stats("after cell creation");
+
+       for_each_cpu(cpu, cell->cpu_set)
+               arch_reset_cpu(cpu);
+
+resume_out:
+       cell_resume(cpu_data);
+
+       return err;
+
+err_restore_cpu_set:
+       for_each_cpu(cpu, cell->cpu_set)
+               set_bit(cpu, shrinking_set->bitmap);
+err_free_cpu_set:
+       destroy_cpu_set(cell);
+err_free_cell:
+       page_free(&mem_pool, cell, cell_pages);
+unmap_out:
+       page_map_destroy(hv_page_table, FOREIGN_MAPPING_BASE, total_size,
+                        PAGE_DIR_LEVELS);
+       goto resume_out;
+}
+
+int shutdown(struct per_cpu *cpu_data)
+{
+       static bool shutdown_started;
+       struct cell *cell = cell_list->next;
+       unsigned int this_cpu = cpu_data->cpu_id;
+       unsigned int cpu;
+
+       // TODO: access control
+
+       spin_lock(&shutdown_lock);
+
+       if (!shutdown_started) {
+               shutdown_started = true;
+
+               printk("Shutting down hypervisor\n");
+
+               while (cell) {
+                       printk(" Closing cell \"%s\"\n", cell->name);
+
+                       for_each_cpu(cpu, cell->cpu_set) {
+                               printk("  Releasing CPU %d\n", cpu);
+                               arch_shutdown_cpu(cpu);
+                       }
+                       cell = cell->next;
+               }
+
+               printk(" Closing Linux cell \"%s\"\n", cell_list->name);
+       }
+       printk("  Releasing CPU %d\n", this_cpu);
+
+       spin_unlock(&shutdown_lock);
+
+       return 0;
+}
diff --git a/hypervisor/hypervisor.lds.S b/hypervisor/hypervisor.lds.S
new file mode 100644 (file)
index 0000000..b5f184f
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/paging.h>
+
+SECTIONS
+{
+       . = 0;
+       __start = .;
+       .header         : { *(.header) }
+
+       . = ALIGN(16);
+       .text           : { *(.text) }
+
+       . = ALIGN(16);
+       .rodata         : { *(.rodata) }
+
+       . = ALIGN(16);
+       .data           : { *(.data) }
+
+       .got            : {
+               __got_start = .;
+               *(.got*)
+               __got_end = .;
+       }
+
+       . = ALIGN(16);
+       .bss            : {
+               __bss_start = .;
+               *(.bss)
+               __bss_end = .;
+       }
+
+       . = ALIGN(PAGE_SIZE);
+       __page_pool = .;
+
+       /DISCARD/ : {
+               *(.eh_frame*)
+       }
+}
diff --git a/hypervisor/include/jailhouse/acpi.h b/hypervisor/include/jailhouse/acpi.h
new file mode 100644 (file)
index 0000000..1a854de
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/types.h>
+
+struct acpi_table_header {
+       u32 signature;
+       u32 length;
+       u8 revision;
+       u8 checksum;
+       char oem_id[6];
+       char oem_table_id[8];
+       u32 oem_revision;
+       char asl_compiler_id[4];
+       u32 asl_compiler_revision;
+};
+
+const struct acpi_table_header *
+acpi_find_table(char name[4], const struct acpi_table_header *start);
diff --git a/hypervisor/include/jailhouse/cell-config.h b/hypervisor/include/jailhouse/cell-config.h
new file mode 100644 (file)
index 0000000..822c3d2
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_CELL_CONFIG_H
+#define _JAILHOUSE_CELL_CONFIG_H
+
+#define JAILHOUSE_CELL_NAME_MAXLEN     31
+
+struct jailhouse_cell_desc {
+       char name[JAILHOUSE_CELL_NAME_MAXLEN+1];
+
+       __u32 cpu_set_size;
+       __u32 num_memory_regions;
+       __u32 num_irq_lines;
+       __u32 pio_bitmap_size;
+
+       __u32 num_pci_devices;
+
+       __u32 padding[3];
+};
+
+#define JAILHOUSE_MEM_READ             0x0001
+#define JAILHOUSE_MEM_WRITE            0x0002
+#define JAILHOUSE_MEM_EXECUTE          0x0004
+#define JAILHOUSE_MEM_DMA              0x0008
+
+#define JAILHOUSE_MEM_VALID_FLAGS      (JAILHOUSE_MEM_READ | \
+                                        JAILHOUSE_MEM_WRITE | \
+                                        JAILHOUSE_MEM_EXECUTE | \
+                                        JAILHOUSE_MEM_DMA)
+
+struct jailhouse_memory {
+       __u64 phys_start;
+       __u64 virt_start;
+       __u64 size;
+       __u64 access_flags;
+};
+
+struct jailhouse_irq_line {
+       __u32 num;
+       __u32 irqchip;
+};
+
+
+struct jailhouse_pci_bridge {
+       // TODO
+       __u32 num_device;
+};
+
+#define JAILHOUSE_PCI_TYPE_DEVICE      0x01
+#define JAILHOUSE_PCI_TYPE_BRIDGE      0x02
+
+struct jailhouse_pci_device {
+       // TODO
+       __u32 type;
+       __u16 domain;
+       __u8 bus;
+       __u8 devfn;
+} __attribute__((packed));
+
+
+struct jailhouse_system {
+       struct jailhouse_memory hypervisor_memory;
+       struct jailhouse_memory config_memory;
+       struct jailhouse_cell_desc system;
+};
+
+static inline __u32
+jailhouse_cell_config_size(struct jailhouse_cell_desc *cell)
+{
+       return sizeof(struct jailhouse_cell_desc) +
+               cell->cpu_set_size +
+               cell->num_memory_regions * sizeof(struct jailhouse_memory) +
+               cell->num_irq_lines * sizeof(struct jailhouse_irq_line) +
+               cell->pio_bitmap_size +
+               cell->num_pci_devices * sizeof(struct jailhouse_pci_device);
+}
+
+static inline __u32
+jailhouse_system_config_size(struct jailhouse_system *system)
+{
+       return sizeof(system->hypervisor_memory) +
+               sizeof(system->config_memory) +
+               jailhouse_cell_config_size(&system->system);
+}
+
+#endif /* !_JAILHOUSE_CELL_CONFIG_H */
diff --git a/hypervisor/include/jailhouse/control.h b/hypervisor/include/jailhouse/control.h
new file mode 100644 (file)
index 0000000..dc81aad
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/types.h>
+#include <asm/percpu.h>
+#include <jailhouse/cell-config.h>
+
+extern struct jailhouse_system *system_config;
+
+unsigned int next_cpu(unsigned int cpu, struct cpu_set *cpu_set,
+                     int exception);
+
+#define for_each_cpu(cpu, set)                                         \
+       for ((cpu) = -1;                                        \
+            (cpu) = next_cpu((cpu), (set), -1),                \
+            (cpu) <= (set)->max_cpu_id;                        \
+           )
+
+#define for_each_cpu_except(cpu, set, exception)               \
+       for ((cpu) = -1;                                        \
+            (cpu) = next_cpu((cpu), (set), (exception)),       \
+            (cpu) <= (set)->max_cpu_id;                        \
+           )
+
+int check_mem_regions(struct jailhouse_cell_desc *config);
+int cell_init(struct cell *cell, struct jailhouse_cell_desc *config,
+             bool copy_cpu_set);
+
+int cell_create(struct per_cpu *cpu_data, unsigned long config_address);
+
+int shutdown(struct per_cpu *cpu_data);
+
+void arch_suspend_cpu(unsigned int cpu_id);
+void arch_resume_cpu(unsigned int cpu_id);
+void arch_reset_cpu(unsigned int cpu_id);
+void arch_shutdown_cpu(unsigned int cpu_id);
+
+int arch_cell_create(struct per_cpu *cpu_data, struct cell *new_cell,
+                    struct jailhouse_cell_desc *config);
diff --git a/hypervisor/include/jailhouse/entry.h b/hypervisor/include/jailhouse/entry.h
new file mode 100644 (file)
index 0000000..77ce18e
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef _JAILHOUSE_ENTRY_H
+#define _JAILHOUSE_ENTRY_H
+
+#include <jailhouse/header.h>
+#include <asm/percpu.h>
+
+#include <jailhouse/cell-config.h>
+
+#define EIO            5
+#define ENOMEM         12
+#define EBUSY          16
+#define ENODEV         19
+#define EINVAL         22
+#define ERANGE         34
+#define ENOSYS         38
+
+extern struct jailhouse_header hypervisor_header;
+extern void *config_memory;
+
+int arch_entry(int cpu_id);
+void got_init(void);
+void vm_exit(void);
+
+int entry(struct per_cpu *cpu_data);
+
+int arch_init_early(struct cell *linux_cell,
+                   struct jailhouse_cell_desc *config);
+int arch_cpu_init(struct per_cpu *cpu_data);
+int arch_init_late(struct cell *linux_cell,
+                  struct jailhouse_cell_desc *config);
+void __attribute__((noreturn)) arch_cpu_activate_vmm(struct per_cpu *cpu_data);
+void arch_cpu_restore(struct per_cpu *cpu_data);
+
+#endif /* !_JAILHOUSE_ENTRY_H */
diff --git a/hypervisor/include/jailhouse/header.h b/hypervisor/include/jailhouse/header.h
new file mode 100644 (file)
index 0000000..05599be
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#define JAILHOUSE_SIGNATURE    "JAILHOUS"
+
+struct jailhouse_header {
+       /* filled at build time */
+       char signature[8];
+       unsigned long bss_start;
+       unsigned long bss_end;
+       unsigned long percpu_size;
+       unsigned long entry;
+
+       /* filled by loader */
+       unsigned long size;
+       unsigned long page_offset;
+       unsigned int possible_cpus;
+       unsigned int online_cpus;
+};
+
+typedef int (*entry_func)(unsigned int);
diff --git a/hypervisor/include/jailhouse/hypercall.h b/hypervisor/include/jailhouse/hypercall.h
new file mode 100644 (file)
index 0000000..deddbb4
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/jailhouse.h>
+
+#define JAILHOUSE_HC_DISABLE           0
+#define JAILHOUSE_HC_CELL_CREATE       1
+#define JAILHOUSE_HC_CELL_DESTROY      2
diff --git a/hypervisor/include/jailhouse/mmio.h b/hypervisor/include/jailhouse/mmio.h
new file mode 100644 (file)
index 0000000..b1b1563
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/percpu.h>
+
+struct mmio_access {
+       unsigned int inst_len;
+       unsigned int size;
+       unsigned int reg;
+};
+
+static inline u32 mmio_read32(void *address)
+{
+       return *(volatile u32 *)address;
+}
+
+static inline u64 mmio_read64(void *address)
+{
+       return *(volatile u64 *)address;
+}
+
+static inline void mmio_write32(void *address, u32 value)
+{
+       *(volatile u32 *)address = value;
+}
+
+static inline void mmio_write64(void *address, u64 value)
+{
+       *(volatile u64 *)address = value;
+}
+
+struct mmio_access mmio_parse(struct per_cpu *cpu_data, unsigned long pc,
+                             unsigned long page_table_addr, bool is_write);
diff --git a/hypervisor/include/jailhouse/paging.h b/hypervisor/include/jailhouse/paging.h
new file mode 100644 (file)
index 0000000..b697a47
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/entry.h>
+#include <asm/types.h>
+#include <asm/paging.h>
+
+#define PAGE_ALIGN(s)          ((s + PAGE_SIZE-1) & PAGE_MASK)
+
+struct page_pool {
+       void *base_address;
+       unsigned long pages;
+       unsigned long used_pages;
+       unsigned long *used_bitmap;
+       unsigned long flags;
+};
+
+extern struct page_pool mem_pool;
+extern struct page_pool remap_pool;
+
+extern pgd_t *hv_page_table;
+
+void *page_alloc(struct page_pool *pool, unsigned int num);
+void page_free(struct page_pool *pool, void *first_page, unsigned int num);
+
+static inline unsigned long page_map_hvirt2phys(void *hvirt)
+{
+       return (unsigned long)hvirt - hypervisor_header.page_offset;
+}
+
+static inline void *page_map_phys2hvirt(unsigned long phys)
+{
+       return (void *)phys + hypervisor_header.page_offset;
+}
+
+unsigned long page_map_virt2phys(pgd_t *page_table,
+                                unsigned long page_table_offset,
+                                unsigned long virt);
+
+int page_map_create(pgd_t *page_table, unsigned long phys, unsigned long size,
+                   unsigned long virt, unsigned long flags,
+                   unsigned long table_flags, unsigned int levels);
+void page_map_destroy(pgd_t *page_table, unsigned long virt,
+                     unsigned long size, unsigned int levels);
+
+void *page_map_get_foreign_page(unsigned int mapping_region,
+                               unsigned long page_table_paddr,
+                               unsigned long page_table_offset,
+                               unsigned long virt, unsigned long flags);
+void page_map_release_foreign_page(unsigned int mapping_region);
+
+int paging_init(void);
+void page_map_dump_stats(const char *when);
diff --git a/hypervisor/include/jailhouse/printk.h b/hypervisor/include/jailhouse/printk.h
new file mode 100644 (file)
index 0000000..38423ad
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/types.h>
+
+extern volatile unsigned long panic_in_progress;
+extern unsigned int panic_cpu;
+
+void printk(const char *fmt, ...);
+
+void panic_printk(const char *fmt, ...);
+
+void arch_dbg_write_init(void);
+void arch_dbg_write(const char *msg);
diff --git a/hypervisor/include/jailhouse/processor.h b/hypervisor/include/jailhouse/processor.h
new file mode 100644 (file)
index 0000000..fd910f5
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <asm/processor.h>
+
+int phys_processor_id(void);
diff --git a/hypervisor/include/jailhouse/string.h b/hypervisor/include/jailhouse/string.h
new file mode 100644 (file)
index 0000000..9e7740f
--- /dev/null
@@ -0,0 +1,14 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+void *memcpy(void *d, const void *s, unsigned long n);
+void *memset(void *s, int c, unsigned long n);
diff --git a/hypervisor/lib.c b/hypervisor/lib.c
new file mode 100644 (file)
index 0000000..9a8fbe9
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/string.h>
+#include <asm/types.h>
+
+void *memset(void *s, int c, unsigned long n)
+{
+       u8 *p = s;
+
+       while (n-- > 0)
+               *p++ = c;
+       return s;
+}
diff --git a/hypervisor/paging.c b/hypervisor/paging.c
new file mode 100644 (file)
index 0000000..e5fede2
--- /dev/null
@@ -0,0 +1,390 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/paging.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/string.h>
+#include <jailhouse/control.h>
+#include <asm/bitops.h>
+
+#define BITS_PER_PAGE          (PAGE_SIZE * 8)
+
+#define PAGE_SCRUB_ON_FREE     0x1
+
+extern u8 __start[], __page_pool[];
+
+struct page_pool mem_pool;
+struct page_pool remap_pool = {
+       .base_address = (void *)REMAP_BASE_ADDR,
+       .pages = BITS_PER_PAGE * NUM_REMAP_BITMAP_PAGES,
+};
+
+pgd_t *hv_page_table;
+
+static void *page_alloc_one(struct page_pool *pool)
+{
+       unsigned long word, page_nr;
+
+       for (word = 0; word < pool->pages / BITS_PER_LONG; word++)
+               if (pool->used_bitmap[word] != ~0UL) {
+                       page_nr = ffz(pool->used_bitmap[word]) +
+                               word * BITS_PER_LONG;
+                       if (page_nr >= pool->pages)
+                               break;
+                       set_bit(page_nr, pool->used_bitmap);
+                       pool->used_pages++;
+                       return pool->base_address + page_nr * PAGE_SIZE;
+               }
+
+       return NULL;
+}
+
+void *page_alloc(struct page_pool *pool, unsigned int num)
+{
+       void *start, *last, *next;
+       unsigned int allocated;
+
+       start = page_alloc_one(pool);
+       if (!start)
+               return NULL;
+
+       for (allocated = 1, last = start; allocated < num;
+            allocated++, last = next) {
+               next = page_alloc_one(pool);
+               if (next != last + PAGE_SIZE) {
+                       page_free(pool, start, allocated);
+                       return NULL;
+               }
+       }
+
+       return start;
+}
+
+void page_free(struct page_pool *pool, void *page, unsigned int num)
+{
+       unsigned long page_nr;
+
+       if (!page)
+               return;
+
+       while (num-- > 0) {
+               if (pool->flags & PAGE_SCRUB_ON_FREE)
+                       memset(page, 0, PAGE_SIZE);
+               page_nr = (page - pool->base_address) / PAGE_SIZE;
+               clear_bit(page_nr, pool->used_bitmap);
+               pool->used_pages--;
+               page += PAGE_SIZE;
+       }
+}
+
+unsigned long page_map_virt2phys(pgd_t *page_table,
+                                unsigned long page_table_offset,
+                                unsigned long virt)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+#if PAGE_DIR_LEVELS == 4
+       pgd = pgd_offset(page_table, virt);
+       if (!pgd_valid(pgd))
+               return INVALID_PHYS_ADDR;
+
+       pud = pud4l_offset(pgd, page_table_offset, virt);
+#elif PAGE_DIR_LEVELS == 3
+       pud = pud3l_offset(pgd, page_table_offset, virt);
+#else
+# error Unsupported paging level
+#endif
+       if (!pud_valid(pud))
+               return INVALID_PHYS_ADDR;
+
+       pmd = pmd_offset(pud, page_table_offset, virt);
+       if (!pmd_valid(pud))
+               return INVALID_PHYS_ADDR;
+
+       if (pmd_is_hugepage(pmd))
+               return phys_address_hugepage(pmd, virt);
+
+       pte = pte_offset(pmd, page_table_offset, virt);
+       if (!pte_valid(pte))
+               return INVALID_PHYS_ADDR;
+
+       return phys_address(pte, virt);
+}
+
+int page_map_create(pgd_t *page_table, unsigned long phys, unsigned long size,
+                   unsigned long virt, unsigned long flags,
+                   unsigned long table_flags, unsigned int levels)
+{
+       unsigned long offs = hypervisor_header.page_offset;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       for (size = PAGE_ALIGN(size); size > 0;
+            phys += PAGE_SIZE, virt += PAGE_SIZE, size -= PAGE_SIZE) {
+               switch (levels) {
+               case 4:
+                       pgd = pgd_offset(page_table, virt);
+                       if (!pgd_valid(pgd)) {
+                               pud = page_alloc(&mem_pool, 1);
+                               if (!pud)
+                                       return -ENOMEM;
+                               set_pgd(pgd, page_map_hvirt2phys(pud),
+                                       table_flags);
+                       }
+                       pud = pud4l_offset(pgd, offs, virt);
+                       break;
+               case 3:
+                       pud = pud3l_offset(page_table, virt);
+                       break;
+               default:
+                       return -EINVAL;
+               }
+
+               if (!pud_valid(pud)) {
+                       pmd = page_alloc(&mem_pool, 1);
+                       if (!pmd)
+                               return -ENOMEM;
+                       set_pud(pud, page_map_hvirt2phys(pmd), table_flags);
+               }
+
+               pmd = pmd_offset(pud, offs, virt);
+               if (!pmd_valid(pmd)) {
+                       pte = page_alloc(&mem_pool, 1);
+                       if (!pte)
+                               return -ENOMEM;
+                       set_pmd(pmd, page_map_hvirt2phys(pte), table_flags);
+               }
+
+               pte = pte_offset(pmd, offs, virt);
+               set_pte(pte, phys, flags);
+       }
+
+       return 0;
+}
+
+void page_map_destroy(pgd_t *page_table, unsigned long virt,
+                     unsigned long size, unsigned int levels)
+{
+       unsigned long offs = hypervisor_header.page_offset;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       for (size = PAGE_ALIGN(size); size > 0;
+            virt += PAGE_SIZE, size -= PAGE_SIZE) {
+               switch (levels) {
+               case 4:
+                       pgd = pgd_offset(page_table, virt);
+                       if (!pgd_valid(pgd))
+                               continue;
+
+                       pud = pud4l_offset(pgd, offs, virt);
+                       break;
+               case 3:
+                       pgd = 0; /* silence compiler warning */
+                       pud = pud3l_offset(page_table, virt);
+                       break;
+               default:
+                       return;
+               }
+               if (!pud_valid(pud))
+                       continue;
+
+               pmd = pmd_offset(pud, offs, virt);
+               if (!pmd_valid(pmd))
+                       continue;
+
+               pte = pte_offset(pmd, offs, virt);
+               clear_pte(pte);
+
+               if (!pt_empty(pmd, offs))
+                       continue;
+               page_free(&mem_pool, pte_offset(pmd, offs, 0), 1);
+               clear_pmd(pmd);
+
+               if (!pmd_empty(pud, offs))
+                       continue;
+               page_free(&mem_pool, pmd_offset(pud, offs, 0), 1);
+               clear_pud(pud);
+
+               if (levels < 4 || !pud_empty(pgd, offs))
+                       continue;
+               page_free(&mem_pool, pud4l_offset(pgd, offs, 0), 1);
+               clear_pgd(pgd);
+       }
+
+       flush_tlb();
+}
+
+void *page_map_get_foreign_page(unsigned int mapping_region,
+                               unsigned long page_table_paddr,
+                               unsigned long page_table_offset,
+                               unsigned long virt, unsigned long flags)
+{
+       unsigned long page_virt, pt_virt, phys;
+#if PAGE_DIR_LEVELS == 4
+       pgd_t *pgd;
+#endif
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+       int err;
+
+       page_virt = FOREIGN_MAPPING_BASE +
+               mapping_region * PAGE_SIZE * NUM_FOREIGN_PAGES;
+
+       pt_virt = page_virt + PAGE_SIZE;
+       phys = page_table_paddr + page_table_offset;
+       err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
+                             PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+                             PAGE_DIR_LEVELS);
+       if (err)
+               goto error_release;
+
+#if PAGE_DIR_LEVELS == 4
+       pgd = pgd_offset((pgd_t *)pt_virt, virt);
+       if (!pgd_valid(pgd))
+               goto error_release;
+       pt_virt += PAGE_SIZE;
+       phys = (unsigned long)pud4l_offset(pgd, page_table_offset, 0);
+       err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
+                             PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+                             PAGE_DIR_LEVELS);
+       if (err)
+               goto error_release;
+
+       pud = pud4l_offset((pgd_t *)&pt_virt, page_table_offset, virt);
+#elif PAGE_DIR_LEVELS == 3
+       pud = pud3l_offset((pgd_t *)pt_virt, virt);
+#else
+# error Unsupported paging level
+#endif
+       if (!pud_valid(pud))
+               goto error_release;
+       pt_virt += PAGE_SIZE;
+       phys = (unsigned long)pmd_offset(pud, page_table_offset, 0);
+       err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
+                             PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+                             PAGE_DIR_LEVELS);
+       if (err)
+               goto error_release;
+
+       pmd = pmd_offset((pud_t *)&pt_virt, page_table_offset, virt);
+       if (!pmd_valid(pmd))
+               goto error_release;
+       if (pmd_is_hugepage(pmd))
+               phys = phys_address_hugepage(pmd, virt);
+       else {
+               pt_virt += PAGE_SIZE;
+               phys = (unsigned long)pte_offset(pmd, page_table_offset, 0);
+               err = page_map_create(hv_page_table, phys, PAGE_SIZE, pt_virt,
+                                     PAGE_READONLY_FLAGS,
+                                     PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
+               if (err)
+                       goto error_release;
+
+               pte = pte_offset((pmd_t *)&pt_virt, page_table_offset, virt);
+               if (!pte_valid(pte))
+                       goto error_release;
+               phys = phys_address(pte, 0) + page_table_offset;
+       }
+
+       err = page_map_create(hv_page_table, phys, PAGE_SIZE, page_virt,
+                             flags, PAGE_DEFAULT_FLAGS, PAGE_DIR_LEVELS);
+       if (err)
+               goto error_release;
+
+       return (void *)page_virt;
+
+error_release:
+       page_map_release_foreign_page(mapping_region);
+       return NULL;
+}
+
+void page_map_release_foreign_page(unsigned int mapping_region)
+{
+       page_map_destroy(hv_page_table,
+                        FOREIGN_MAPPING_BASE +
+                        mapping_region * PAGE_SIZE * NUM_FOREIGN_PAGES,
+                        NUM_FOREIGN_PAGES * PAGE_SIZE, PAGE_DIR_LEVELS);
+}
+
+int paging_init(void)
+{
+       unsigned long per_cpu_pages, config_pages, bitmap_pages;
+       unsigned long n;
+       u8 *addr;
+       int err;
+
+       mem_pool.pages =
+               (hypervisor_header.size - (__page_pool - __start)) / PAGE_SIZE;
+       per_cpu_pages = hypervisor_header.possible_cpus *
+               sizeof(struct per_cpu) / PAGE_SIZE;
+       bitmap_pages = (mem_pool.pages + BITS_PER_PAGE - 1) / BITS_PER_PAGE;
+
+       system_config = (struct jailhouse_system *)
+               (__page_pool + per_cpu_pages * PAGE_SIZE);
+       config_pages = (jailhouse_system_config_size(system_config) +
+                       PAGE_SIZE - 1) / PAGE_SIZE;
+
+       if (mem_pool.pages <= per_cpu_pages + config_pages + bitmap_pages)
+               goto error_nomem;
+
+       mem_pool.base_address = __page_pool;
+       mem_pool.used_bitmap =
+               (unsigned long *)(__page_pool + per_cpu_pages * PAGE_SIZE +
+                                 config_pages * PAGE_SIZE);
+       mem_pool.used_pages = per_cpu_pages + config_pages + bitmap_pages;
+       for (n = 0; n < mem_pool.used_pages; n++)
+               set_bit(n, mem_pool.used_bitmap);
+       mem_pool.flags = PAGE_SCRUB_ON_FREE;
+
+       remap_pool.used_bitmap = page_alloc(&mem_pool, NUM_REMAP_BITMAP_PAGES);
+       remap_pool.used_pages =
+               hypervisor_header.possible_cpus * NUM_FOREIGN_PAGES;
+       for (n = 0; n < remap_pool.used_pages; n++)
+               set_bit(n, remap_pool.used_bitmap);
+
+       hv_page_table = page_alloc(&mem_pool, 1);
+       if (!hv_page_table)
+               goto error_nomem;
+
+       /* Replicate hypervisor mapping of Linux */
+       for (addr = __start; addr < __start + hypervisor_header.size;
+            addr += PAGE_SIZE) {
+               err = page_map_create(hv_page_table, page_map_hvirt2phys(addr),
+                                     PAGE_SIZE, (unsigned long)addr,
+                                     PAGE_DEFAULT_FLAGS, PAGE_DEFAULT_FLAGS,
+                                     PAGE_DIR_LEVELS);
+               if (err)
+                       goto error_nomem;
+       }
+
+       return 0;
+
+error_nomem:
+       printk("FATAL: page pool much too small\n");
+       return -ENOMEM;
+}
+
+void page_map_dump_stats(const char *when)
+{
+       printk("Page pool usage %s: mem %d/%d, remap %d/%d\n", when,
+              mem_pool.used_pages, mem_pool.pages,
+              remap_pool.used_pages, remap_pool.pages);
+}
diff --git a/hypervisor/printk-core.c b/hypervisor/printk-core.c
new file mode 100644 (file)
index 0000000..68c8ed0
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#if BITS_PER_LONG < 64
+
+static unsigned long long div_u64_u64(unsigned long long dividend,
+                                     unsigned long long divisor)
+{
+       unsigned long long result = 0;
+       unsigned long long tmp_res, tmp_div;
+
+       while (dividend >= divisor) {
+               tmp_div = divisor << 1;
+               tmp_res = 1;
+               while (dividend >= tmp_div) {
+                       tmp_div <<= 1;
+                       tmp_res <<= 1;
+               }
+               dividend -= divisor * tmp_res;
+               result += tmp_res;
+       }
+       return result;
+}
+
+#else /* BITS_PER_LONG >= 64 */
+
+static inline unsigned long long div_u64_u64(unsigned long long dividend,
+                                            unsigned long long divisor)
+{
+       return dividend / divisor;
+}
+
+#endif /* BITS_PER_LONG >= 64 */
+
+static char *uint2str(unsigned long long value, char *buf)
+{
+       unsigned long long digit, divisor = 10000000000000000000ULL;
+       int first_digit = 1;
+
+       while (divisor > 0) {
+               digit = div_u64_u64(value, divisor);
+               value -= digit * divisor;
+               if (!first_digit || digit > 0 || divisor == 1) {
+                       *buf++ = '0' + digit;
+                       first_digit = 0;
+               }
+               divisor = div_u64_u64(divisor, 10);
+       }
+
+       return buf;
+}
+
+static char *int2str(long long value, char *buf)
+{
+       if (value < 0) {
+               *buf++ = '-';
+               value = -value;
+       }
+       return uint2str(value, buf);
+}
+
+static char *hex2str(unsigned long long value, char *buf,
+                    unsigned long long leading_zero_mask)
+{
+       const char hexdigit[] = "0123456789abcdef";
+       unsigned long long digit, divisor = 0x1000000000000000ULL;
+       int first_digit = 1;
+
+       while (divisor > 0) {
+               digit = div_u64_u64(value, divisor);
+               value -= digit * divisor;
+               if (!first_digit || digit > 0 || divisor == 1 ||
+                   divisor & leading_zero_mask) {
+                       *buf++ = hexdigit[digit];
+                       first_digit = 0;
+               }
+               divisor >>= 4;
+       }
+
+       return buf;
+}
+
+static char *align(char *p1, char *p0, unsigned long width)
+{
+       unsigned int n;
+
+       if (p1 - p0 >= width)
+               return p1;
+
+       for (n = 1; p1 - n >= p0; n++)
+               *(p0 + width - n) = *(p1 - n);
+       memset(p0, ' ', width - (p1 - p0));
+       return p0 + width;
+}
+
+static void __vprintk(const char *fmt, va_list ap)
+{
+       char buf[128];
+       char *p, *p0;
+       char c;
+       unsigned long long v;
+       unsigned int width;
+       bool longmode;
+
+       p = buf;
+
+       while (1) {
+               c = *fmt++;
+               if (c == 0)
+                       break;
+               else if (c == '%') {
+                       *p = 0;
+                       console_write(buf);
+                       p = buf;
+
+                       c = *fmt++;
+
+                       width = 0;
+                       p0 = p;
+                       while (c >= '0' && c <= '9') {
+                               width = width * 10 + c - '0';
+                               c = *fmt++;
+                               if (width >= sizeof(buf) - 1)
+                                       width = 0;
+                       }
+
+                       longmode = false;
+                       if (c == 'l') {
+                               longmode = true;
+                               c = *fmt++;
+                       }
+
+                       switch (c) {
+                       case 'd':
+                               if (longmode)
+                                       v = va_arg(ap, long);
+                               else
+                                       v = va_arg(ap, int);
+                               p = int2str(v, p);
+                               p = align(p, p0, width);
+                               break;
+                       case 'p':
+                               *p++ = '0';
+                               *p++ = 'x';
+                               v = va_arg(ap, unsigned long);
+                               p = hex2str(v, p, (unsigned long)-1);
+                               break;
+                       case 's':
+                               console_write(va_arg(ap, const char *));
+                               break;
+                       case 'u':
+                               if (longmode)
+                                       v = va_arg(ap, unsigned long);
+                               else
+                                       v = va_arg(ap, unsigned int);
+                               p = uint2str(v, p);
+                               p = align(p, p0, width);
+                               break;
+                       case 'x':
+                               if (longmode)
+                                       v = va_arg(ap, unsigned long);
+                               else
+                                       v = va_arg(ap, unsigned int);
+                               p = hex2str(v, p, 0);
+                               p = align(p, p0, width);
+                               break;
+                       default:
+                               *p++ = '%';
+                               *p++ = c;
+                               break;
+                       }
+               } else if (c == '\n') {
+                       *p++ = c;
+                       *p = 0;
+                       console_write(buf);
+                       p = buf;
+                       *p++ = '\r';
+               } else
+                       *p++ = c;
+
+               if (p >= &buf[sizeof(buf) - 1]) {
+                       *p = 0;
+                       console_write(buf);
+                       p = buf;
+               }
+       }
+
+       *p = 0;
+       console_write(buf);
+}
diff --git a/hypervisor/printk.c b/hypervisor/printk.c
new file mode 100644 (file)
index 0000000..603bb83
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <stdarg.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/processor.h>
+#include <jailhouse/string.h>
+#include <asm/spinlock.h>
+
+volatile unsigned long panic_in_progress;
+unsigned int panic_cpu = -1;
+
+static DEFINE_SPINLOCK(printk_lock);
+
+#define console_write(msg)     arch_dbg_write(msg)
+#include "printk-core.c"
+
+void printk(const char *fmt, ...)
+{
+       va_list ap;
+
+       va_start(ap, fmt);
+
+       spin_lock(&printk_lock);
+       __vprintk(fmt, ap);
+       spin_unlock(&printk_lock);
+
+       va_end(ap);
+}
+
+void panic_printk(const char *fmt, ...)
+{
+       unsigned int cpu_id = phys_processor_id();
+       va_list ap;
+
+       if (test_and_set_bit(0, &panic_in_progress) && panic_cpu != cpu_id)
+               return;
+       panic_cpu = cpu_id;
+
+       va_start(ap, fmt);
+
+       __vprintk(fmt, ap);
+
+       va_end(ap);
+}
diff --git a/hypervisor/setup.c b/hypervisor/setup.c
new file mode 100644 (file)
index 0000000..71b153f
--- /dev/null
@@ -0,0 +1,178 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <jailhouse/processor.h>
+#include <jailhouse/printk.h>
+#include <jailhouse/entry.h>
+#include <jailhouse/paging.h>
+#include <jailhouse/control.h>
+#include <jailhouse/string.h>
+#include <asm/spinlock.h>
+
+extern u8 __start[];
+extern u8 __bss_start[], __bss_end[];
+
+void *config_memory;
+
+static DEFINE_SPINLOCK(init_lock);
+static unsigned int master_cpu_id = -1;
+static volatile unsigned int initialized_cpus;
+static volatile int error;
+static struct cell linux_cell;
+
+static int register_linux_cpu(struct per_cpu *cpu_data)
+{
+       unsigned long *system_cpu_set =
+               (unsigned long *)(((void *)&system_config->system) +
+                                 sizeof(struct jailhouse_cell_desc));
+
+       if (cpu_data->cpu_id >= system_config->system.cpu_set_size * 8 ||
+           !test_bit(cpu_data->cpu_id, system_cpu_set))
+               return -EINVAL;
+
+       cpu_data->cell = &linux_cell;
+       set_bit(cpu_data->cpu_id, linux_cell.cpu_set->bitmap);
+       return 0;
+}
+
+static void init_early(unsigned int cpu_id)
+{
+       unsigned long size;
+
+       master_cpu_id = cpu_id;
+
+       /* must be first, printk/arch_dbg_write uses the GOT */
+       got_init();
+
+       arch_dbg_write_init();
+
+       printk("\nInitializing Jailhouse hypervisor on CPU %d\n", cpu_id);
+       printk("Code location: %p\n",
+              __start + sizeof(struct jailhouse_header));
+
+       error = paging_init();
+       if (error)
+               return;
+
+       if (system_config->config_memory.size > 0) {
+               size = PAGE_ALIGN(system_config->config_memory.size);
+
+               config_memory = page_alloc(&remap_pool, size / PAGE_SIZE);
+               if (!config_memory) {
+                       error = -ENOMEM;
+                       return;
+               }
+
+               error = page_map_create(hv_page_table,
+                               system_config->config_memory.phys_start,
+                               size, (unsigned long)config_memory,
+                               PAGE_READONLY_FLAGS, PAGE_DEFAULT_FLAGS,
+                               PAGE_DIR_LEVELS);
+               if (error)
+                       return;
+       }
+
+       error = check_mem_regions(&system_config->system);
+       if (error)
+               return;
+
+       error = arch_init_early(&linux_cell, &system_config->system);
+       if (error)
+               return;
+
+       error = cell_init(&linux_cell, &system_config->system, false);
+       if (error)
+               return;
+
+       cell_list = &linux_cell;
+
+       page_map_dump_stats("after early setup");
+       printk("Initializing first processor:\n");
+}
+
+static void cpu_init(struct per_cpu *cpu_data)
+{
+       int err;
+
+       printk(" CPU %d... ", cpu_data->cpu_id);
+
+       err = register_linux_cpu(cpu_data);
+       if (err)
+               goto failed;
+
+       err = arch_cpu_init(cpu_data);
+       if (err)
+               goto failed;
+
+       printk("OK\n");
+       initialized_cpus++;
+       return;
+
+failed:
+       printk("FAILED\n");
+       if (!error)
+               error = err;
+}
+
+static void init_late(void)
+{
+       error = arch_init_late(&linux_cell, &system_config->system);
+       if (error)
+               return;
+
+       page_map_dump_stats("after late setup");
+       printk("Initializing remaining processors:\n");
+}
+
+int entry(struct per_cpu *cpu_data)
+{
+       bool master = false;
+
+       spin_lock(&init_lock);
+
+       if (master_cpu_id == -1) {
+               master = true;
+               init_early(cpu_data->cpu_id);
+       }
+
+       if (!error) {
+               cpu_init(cpu_data);
+
+               if (master && !error)
+                       init_late();
+       }
+
+       spin_unlock(&init_lock);
+
+       while (!error && initialized_cpus < hypervisor_header.online_cpus)
+               cpu_relax();
+
+       if (error) {
+               arch_cpu_restore(cpu_data);
+               return error;
+       }
+
+       if (master)
+               printk("Activating hypervisor\n");
+
+       /* point of no return */
+       arch_cpu_activate_vmm(cpu_data);
+}
+
+struct jailhouse_header __attribute__((section(".header")))
+hypervisor_header = {
+       .signature = JAILHOUSE_SIGNATURE,
+       .bss_start = (unsigned long)__bss_start,
+       .bss_end = (unsigned long)__bss_end,
+       .percpu_size = sizeof(struct per_cpu),
+       .entry = (unsigned long)arch_entry,
+};
diff --git a/inmate/Makefile b/inmate/Makefile
new file mode 100644 (file)
index 0000000..c9c414b
--- /dev/null
@@ -0,0 +1,51 @@
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+#  Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+LINUXINCLUDE := -I$(src)
+KBUILD_CFLAGS := -g -Os -Wall -Wstrict-prototypes -Wtype-limits \
+                -Wmissing-declarations -Wmissing-prototypes \
+                -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
+                -fno-common -I.
+ifneq ($(wildcard $(src)/../hypervisor/include/jailhouse/config.h),)
+KBUILD_CFLAGS += -include $(src)/../hypervisor/include/jailhouse/config.h
+endif
+
+OBJCOPYFLAGS := -O binary
+LDFLAGS := -T
+
+ifeq ($(SRCARCH), x86)
+always := tiny-demo.bin apic-demo.bin
+endif
+
+tiny-demo-y := tiny-demo.o header.o printk.o pm-timer.o
+targets += $(tiny-demo-y)
+
+TINY_DEMO_OBJS = $(addprefix $(obj)/,$(tiny-demo-y))
+
+target += tiny-demo-linked.o
+$(obj)/tiny-demo-linked.o: $(src)/inmate.lds $(TINY_DEMO_OBJS)
+       $(call if_changed,ld)
+
+
+apic-demo-y := apic-demo.o header.o printk.o pm-timer.o
+targets += $(apic-demo-y)
+
+APIC_DEMO_OBJS = $(addprefix $(obj)/,$(apic-demo-y))
+
+target += apic-demo-linked.o
+$(obj)/apic-demo-linked.o: $(src)/inmate.lds $(APIC_DEMO_OBJS)
+       $(call if_changed,ld)
+
+
+targets += tiny-demo.bin apic-demo.bin
+$(obj)/%.bin: $(obj)/%-linked.o
+       $(call if_changed,objcopy)
diff --git a/inmate/apic-demo.c b/inmate/apic-demo.c
new file mode 100644 (file)
index 0000000..3a1044d
--- /dev/null
@@ -0,0 +1,125 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <inmate.h>
+
+#define NS_PER_MSEC            1000000UL
+#define NS_PER_SEC             1000000000UL
+
+#define NUM_IDT_DESC           33
+#define APIC_TIMER_VECTOR      32
+
+#define X2APIC_EOI             0x80b
+#define X2APIC_LVTT            0x832
+#define X2APIC_TMICT           0x838
+#define X2APIC_TMCCT           0x839
+#define X2APIC_TDCR            0x83e
+
+#define APIC_EOI_ACK           0
+
+static u32 idt[NUM_IDT_DESC * 4];
+static unsigned long apic_frequency;
+static unsigned long expected_time;
+static unsigned long min = -1, max;
+
+struct desc_table_reg {
+       u16 limit;
+       u64 base;
+} __attribute__((packed));
+
+static inline unsigned long read_msr(unsigned int msr)
+{
+       u32 low, high;
+
+       asm volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr));
+       return low | ((unsigned long)high << 32);
+}
+
+static inline void write_msr(unsigned int msr, unsigned long val)
+{
+       asm volatile("wrmsr"
+               : /* no output */
+               : "c" (msr), "a" (val), "d" (val >> 32)
+               : "memory");
+}
+
+static inline void write_idtr(struct desc_table_reg *val)
+{
+       asm volatile("lidtq %0" : "=m" (*val));
+}
+
+void irq_handler(void)
+{
+       unsigned long delta;
+
+       write_msr(X2APIC_EOI, APIC_EOI_ACK);
+
+       delta = read_pm_timer() - expected_time;
+       if (delta < min)
+               min = delta;
+       if (delta > max)
+               max = delta;
+       printk("Timer fired, jitter: %6ld ns, min: %6ld ns, max: %6ld ns\n",
+              delta, min, max);
+
+       expected_time += 100 * NS_PER_MSEC;
+       write_msr(X2APIC_TMICT,
+                 (expected_time - read_pm_timer()) * apic_frequency / NS_PER_SEC);
+}
+
+static void init_apic(void)
+{
+       unsigned long entry = (unsigned long)irq_entry + FSEGMENT_BASE;
+       struct desc_table_reg dtr;
+       unsigned long start, end;
+       unsigned long tmr;
+
+       write_msr(X2APIC_TDCR, 3);
+
+       start = read_pm_timer();
+       write_msr(X2APIC_TMICT, 0xffffffff);
+
+       while (read_pm_timer() - start < 100 * NS_PER_MSEC)
+               cpu_relax();
+
+       end = read_pm_timer();
+       tmr = read_msr(X2APIC_TMCCT);
+
+       apic_frequency = (0xffffffff - tmr) * NS_PER_SEC / (end - start);
+
+       printk("Calibrated APIC frequency: %lu kHz\n",
+              (apic_frequency * 16 + 500) / 1000);
+
+       idt[APIC_TIMER_VECTOR * 4] = (entry & 0xffff) | (INMATE_CS << 16);
+       idt[APIC_TIMER_VECTOR * 4 + 1] = 0x8e00 | (entry & 0xffff0000);
+       idt[APIC_TIMER_VECTOR * 4 + 2] = entry >> 32;
+
+       dtr.limit = NUM_IDT_DESC * 16 - 1;
+       dtr.base = (u64)&idt;
+       write_idtr(&dtr);
+
+       write_msr(X2APIC_LVTT, APIC_TIMER_VECTOR);
+       expected_time = read_pm_timer();
+       write_msr(X2APIC_TMICT, 1);
+
+       asm volatile("sti");
+}
+
+void inmate_main(void)
+{
+       if (init_pm_timer())
+               init_apic();
+
+       while (1) {
+               asm volatile("hlt");
+       }
+}
diff --git a/inmate/header.S b/inmate/header.S
new file mode 100644 (file)
index 0000000..fd613e5
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <inmate.h>
+
+#define X86_CR0_PE     0x00000001
+#define X86_CR0_WP     0x00010000
+#define X86_CR0_PG     0x80000000
+
+#define X86_CR4_PAE    0x00000020
+
+#define MSR_EFER       0xc0000080
+#define EFER_LME       0x00000100
+
+       .code16gcc
+       .section ".boot", "ax"
+
+       ljmp $0xf000,$start16
+
+
+       .section ".startup", "ax"
+
+start16:
+       cs,lgdtl gdt_ptr
+
+       mov %cr0,%eax
+       or $X86_CR0_PE,%al
+       mov %eax,%cr0
+
+       ljmpl $LOADER_CS32,$start32 + FSEGMENT_BASE
+
+
+       .code32
+start32:
+       mov %cr4,%eax
+       or $X86_CR4_PAE,%eax
+       mov %eax,%cr4
+
+       mov $pml4 + FSEGMENT_BASE,%eax
+       mov %eax,%cr3
+
+       movl $MSR_EFER,%ecx
+       rdmsr
+       or $EFER_LME,%eax
+       wrmsr
+
+       mov $(X86_CR0_PG | X86_CR0_WP | X86_CR0_PE),%eax
+       mov %eax,%cr0
+
+       ljmpl $INMATE_CS,$start64 + FSEGMENT_BASE
+
+       .code64
+start64:
+       mov $stack_top,%rsp
+
+       mov $inmate_main,%rax
+       jmpq *%rax
+
+
+       .align(16)
+gdt:
+       .quad   0
+       .quad   0x00c09b000000ffff
+       .quad   0x00af9b000000ffff
+
+gdt_ptr:
+       .short  gdt_ptr - gdt - 1
+       .long   gdt + FSEGMENT_BASE
+
+       .align(4096)
+pml4:
+       .quad   pdpt + FSEGMENT_BASE + 0x003
+
+       .align(4096)
+pdpt:
+       .quad   pd + FSEGMENT_BASE + 0x003
+
+       .align(4096)
+pd:
+       .quad   0x0000000000000083
+
+
+       .global irq_entry
+       .balign 16
+irq_entry:
+       push %rax
+       push %rcx
+       push %rdx
+       push %rsi
+       push %rdi
+       push %r8
+       push %r9
+       push %r10
+       push %r11
+
+       call irq_handler - FSEGMENT_BASE
+
+       pop %r11
+       pop %r10
+       pop %r9
+       pop %r8
+       pop %rdi
+       pop %rsi
+       pop %rdx
+       pop %rcx
+       pop %rax
+
+       iretq
+
+
+/* to please linker if irq_entry remains unused */
+       .weak irq_handler
+irq_handler:
diff --git a/inmate/inmate.h b/inmate/inmate.h
new file mode 100644 (file)
index 0000000..e25070a
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#define FSEGMENT_BASE  0xf0000
+
+#define LOADER_CS32    0x8
+#define INMATE_CS      0x10
+
+#ifndef __ASSEMBLY__
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long s64;
+typedef unsigned long u64;
+
+typedef enum { true=1, false=0 } bool;
+
+static inline void cpu_relax(void)
+{
+       asm volatile("rep; nop");
+}
+
+static inline void outb(u8 v, u16 port)
+{
+       asm volatile("outb %0,%1" : : "a" (v), "dN" (port));
+}
+
+static inline u8 inb(u16 port)
+{
+       u8 v;
+       asm volatile("inb %1,%0" : "=a" (v) : "dN" (port));
+       return v;
+}
+
+static inline u32 inl(u16 port)
+{
+       u32 v;
+       asm volatile("inl %1,%0" : "=a" (v) : "dN" (port));
+       return v;
+}
+
+void printk(const char *fmt, ...);
+
+void *memset(void *s, int c, unsigned long n);
+
+extern u8 irq_entry[];
+void irq_handler(void);
+
+void inmate_main(void);
+
+bool init_pm_timer(void);
+unsigned long read_pm_timer(void);
+#endif
diff --git a/inmate/inmate.lds b/inmate/inmate.lds
new file mode 100644 (file)
index 0000000..171bb80
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+SECTIONS
+{
+       /* 16-bit sections */
+       . = 0;
+       .startup        : { *(.startup) }
+
+       . = 0xfff0;
+       .boot           : {
+               *(.boot)
+               . = ALIGN(16);
+       }
+
+       /* 32-bit sections */
+       . = 0xe0000;
+       stack_top = .;
+       .bss            : { *(.bss) }
+
+       . = 0xf0000 + SIZEOF(.startup);
+       .text           : AT (ADDR(.text) & 0xffff) {
+               *(.text)
+       }
+
+       . = ALIGN(16);
+       .rodata         : AT (ADDR(.rodata) & 0xffff) {
+               *(.rodata)
+       }
+
+       . = ALIGN(16);
+       .data           : AT (ADDR(.data) & 0xffff) {
+               *(.data)
+       }
+
+       /DISCARD/ : {
+               *(.eh_frame*)
+       }
+}
diff --git a/inmate/pm-timer.c b/inmate/pm-timer.c
new file mode 100644 (file)
index 0000000..cf010db
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <inmate.h>
+
+#define NS_PER_MSEC            1000000UL
+#define NS_PER_SEC             1000000000UL
+
+#define PM_TIMER_HZ            3579545
+#define PM_TIMER_OVERFLOW      ((0x1000000 * 1000000000ULL) / PM_TIMER_HZ)
+
+static const unsigned int pm_timer_list[] = { 0x408, 0x1808, 0xb008, 0 };
+static unsigned int pm_timer;
+
+unsigned long read_pm_timer(void)
+{
+       static unsigned long last, overflows;
+       unsigned long tmr;
+
+       tmr = (inl(pm_timer) * NS_PER_SEC) / PM_TIMER_HZ;
+       if (tmr < last)
+               overflows += PM_TIMER_OVERFLOW;
+       last = tmr;
+       return tmr + overflows;
+}
+
+bool init_pm_timer(void)
+{
+       unsigned long val, loop;
+       unsigned int n = 0;
+
+       while (pm_timer_list[n]) {
+               pm_timer = pm_timer_list[n++];
+               val = read_pm_timer();
+               for (loop = 0; loop < 10; loop++)
+                       cpu_relax();
+               if (read_pm_timer() != val) {
+                       printk("Found PM Timer at %x\n", pm_timer);
+                       return true;
+               }
+       }
+       printk("Could not find PM Timer\n");
+       return false;
+}
diff --git a/inmate/printk.c b/inmate/printk.c
new file mode 100644 (file)
index 0000000..59c7a38
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <stdarg.h>
+#include <inmate.h>
+
+#ifdef CONFIG_UART_OXPCIE952
+#define UART_BASE              0xe010
+#else
+#define UART_BASE              0x3f8
+#endif
+#define  UART_TX               0x0
+#define  UART_LSR              0x5
+#define  UART_LSR_THRE         0x20
+
+static void uart_write(const char *msg)
+{
+       char c;
+
+       while (1) {
+               c = *msg++;
+               if (!c)
+                       break;
+               while (!(inb(UART_BASE + UART_LSR) & UART_LSR_THRE))
+                       cpu_relax();
+               outb(c, UART_BASE + UART_TX);
+       }
+}
+
+#define console_write(msg)     uart_write(msg)
+#include "../hypervisor/printk-core.c"
+
+void printk(const char *fmt, ...)
+{
+       va_list ap;
+
+       va_start(ap, fmt);
+
+       __vprintk(fmt, ap);
+
+       va_end(ap);
+}
diff --git a/inmate/tiny-demo.c b/inmate/tiny-demo.c
new file mode 100644 (file)
index 0000000..d5d60cd
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <inmate.h>
+
+void inmate_main(void)
+{
+       unsigned long long start, now;
+       int n;
+
+       printk("Hello from this tiny cell!\n");
+
+       if (init_pm_timer()) {
+               start = read_pm_timer();
+               for (n = 0; n < 10; n++) {
+                       do {
+                               now = read_pm_timer();
+                               cpu_relax();
+                       } while (now - start < 1000000000ULL);
+                       start += 1000000000ULL;
+                       printk("PM Timer: %11lu\n", now);
+               }
+       }
+
+       asm volatile("hlt");
+}
diff --git a/jailhouse.h b/jailhouse.h
new file mode 100644 (file)
index 0000000..e318ec4
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include <jailhouse/cell-config.h>
+
+struct jailhouse_preload_image {
+       __u64 source_address;
+       __u64 size;
+       __u64 target_address;
+       __u64 padding;
+};
+
+struct jailhouse_new_cell {
+       __u64 config_address;
+       __u32 config_size;
+       __u32 num_preload_images;
+       struct jailhouse_preload_image image[];
+};
+
+#define JAILHOUSE_ENABLE               _IOW(0, 0, struct jailhouse_system)
+#define JAILHOUSE_DISABLE              _IO(0, 1)
+#define JAILHOUSE_CELL_CREATE          _IOW(0, 2, struct jailhouse_new_cell)
+#define JAILHOUSE_CELL_DESTROY         _IOW(0, 3, const char *)
diff --git a/main.c b/main.c
new file mode 100644 (file)
index 0000000..25e68d1
--- /dev/null
+++ b/main.c
@@ -0,0 +1,397 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/firmware.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/uaccess.h>
+#include <asm/smp.h>
+#include <asm/cacheflush.h>
+
+#include <jailhouse.h>
+#include <jailhouse/header.h>
+#include <jailhouse/hypercall.h>
+
+#define JAILHOUSE_FW_NAME      "jailhouse.bin"
+
+MODULE_DESCRIPTION("Loader for Jailhouse partitioning hypervisor");
+MODULE_LICENSE("GPL");
+MODULE_FIRMWARE(JAILHOUSE_FW_NAME);
+
+static struct device *jailhouse_dev;
+static DEFINE_MUTEX(lock);
+static bool enabled;
+static void *hypervisor_mem;
+static cpumask_t offlined_cpus;
+static atomic_t call_done;
+static int error_code;
+
+#ifdef CONFIG_X86
+
+static void *jailhouse_ioremap(phys_addr_t start, unsigned long size)
+{
+       void *addr;
+
+       addr = (__force void *)ioremap_cache(start, size);
+       if (addr)
+               set_memory_x((unsigned long)addr, size / PAGE_SIZE);
+       return addr;
+}
+
+#elif defined(CONFIG_ARM)
+
+#include <asm/mach/map.h>
+
+static void *jailhouse_ioremap(phys_addr_t start, unsigned long size)
+{
+       return (__force void *)__arm_ioremap(start, size, MT_MEMORY);
+}
+
+#else
+#error Unsupported architecture
+#endif
+
+static void enter_hypervisor(void *info)
+{
+       struct jailhouse_header *header = info;
+       entry_func entry;
+       int err;
+
+       entry = (entry_func)(hypervisor_mem + header->entry);
+
+       /* either returns 0 or the same error code across all CPUs */
+       err = entry(smp_processor_id());
+       if (err)
+               error_code = err;
+
+       atomic_inc(&call_done);
+}
+
+static int jailhouse_enable(struct jailhouse_system __user *arg)
+{
+       unsigned long hv_core_size, percpu_size, config_size;
+       const struct firmware *hypervisor;
+       struct jailhouse_system config_header;
+       struct jailhouse_memory *hv_mem = &config_header.hypervisor_memory;
+       struct jailhouse_header *header;
+       int err;
+
+       if (copy_from_user(&config_header, arg, sizeof(config_header)))
+               return -EFAULT;
+
+       if (mutex_lock_interruptible(&lock) != 0)
+               return -EINTR;
+
+       err = -EBUSY;
+       if (enabled || !try_module_get(THIS_MODULE))
+               goto error_unlock;
+
+       err = request_firmware(&hypervisor, JAILHOUSE_FW_NAME, jailhouse_dev);
+       if (err)
+               goto error_put_module;
+
+       header = (struct jailhouse_header *)hypervisor->data;
+
+       err = -EINVAL;
+       if (memcmp(header->signature, JAILHOUSE_SIGNATURE,
+                  sizeof(header->signature)) != 0)
+               goto error_release_fw;
+
+       hv_core_size = PAGE_ALIGN(header->bss_end);
+       percpu_size = num_possible_cpus() * header->percpu_size;
+       config_size = jailhouse_system_config_size(&config_header);
+       if (hv_mem->size <= hv_core_size + percpu_size + config_size)
+               goto error_release_fw;
+
+       /* CMA would be better... */
+       hypervisor_mem = jailhouse_ioremap(hv_mem->phys_start, hv_mem->size);
+       if (!hypervisor_mem)
+               goto error_release_fw;
+
+       memcpy(hypervisor_mem, hypervisor->data, hypervisor->size);
+       memset(hypervisor_mem + hypervisor->size, 0,
+              hv_mem->size - hypervisor->size);
+
+       header = (struct jailhouse_header *)hypervisor_mem;
+       header->size = hv_mem->size;
+       header->page_offset =
+               (unsigned long)hypervisor_mem - hv_mem->phys_start;
+       header->possible_cpus = num_possible_cpus();
+
+       if (copy_from_user(hypervisor_mem + hv_core_size + percpu_size, arg,
+                          config_size)) {
+               err = -EFAULT;
+               goto error_unmap;
+       }
+
+       error_code = 0;
+
+       preempt_disable();
+
+       header->online_cpus = num_online_cpus();
+
+       atomic_set(&call_done, 0);
+       on_each_cpu(enter_hypervisor, header, 0);
+       while (atomic_read(&call_done) != num_online_cpus())
+               cpu_relax();
+
+       preempt_enable();
+
+       if (error_code) {
+               err = error_code;
+               goto error_unmap;
+       }
+
+       release_firmware(hypervisor);
+
+       enabled = true;
+
+       mutex_unlock(&lock);
+
+       printk("The Jailhouse is opening.\n");
+
+       return 0;
+
+error_unmap:
+       iounmap((__force void __iomem *)hypervisor_mem);
+
+error_release_fw:
+       release_firmware(hypervisor);
+
+error_put_module:
+       module_put(THIS_MODULE);
+
+error_unlock:
+       mutex_unlock(&lock);
+       return err;
+}
+
+static void leave_hypervisor(void *info)
+{
+       int err;
+
+       /* either returns 0 or the same error code across all CPUs */
+       err = jailhouse_call0(JAILHOUSE_HC_DISABLE);
+       if (err)
+               error_code = err;
+
+       atomic_inc(&call_done);
+}
+
+static int jailhouse_disable(void)
+{
+       unsigned int cpu;
+       int err;
+
+       if (mutex_lock_interruptible(&lock) != 0)
+               return -EINTR;
+
+       if (!enabled) {
+               mutex_unlock(&lock);
+               return -EINVAL;
+       }
+
+       error_code = 0;
+
+       preempt_disable();
+
+       atomic_set(&call_done, 0);
+       on_each_cpu(leave_hypervisor, NULL, 0);
+       while (atomic_read(&call_done) != num_online_cpus())
+               cpu_relax();
+
+       preempt_enable();
+
+       err = error_code;
+       if (err)
+               goto unlock_out;
+
+       iounmap((__force void __iomem *)hypervisor_mem);
+
+       for_each_cpu_mask(cpu, offlined_cpus)
+               if (cpu_up(cpu) != 0)
+                       printk("Jailhouse: failed to bring CPU %d back "
+                              "online\n", cpu);
+
+       enabled = false;
+       module_put(THIS_MODULE);
+
+       printk("The Jailhouse was closed.\n");
+
+unlock_out:
+       mutex_unlock(&lock);
+
+       return err;
+}
+
+static int jailhouse_cell_create(struct jailhouse_new_cell __user *arg)
+{
+       struct {
+               struct jailhouse_new_cell cell;
+               struct jailhouse_preload_image image;
+       } cell_buffer;
+       struct jailhouse_new_cell *cell = &cell_buffer.cell;
+       struct jailhouse_preload_image *image = &cell->image[0];
+       unsigned int mask_pos, bit_pos, cpu;
+       struct jailhouse_cell_desc *config;
+       struct jailhouse_memory *ram;
+       void *cell_mem;
+       u8 *cpu_mask;
+       int err;
+
+       if (copy_from_user(cell, arg, sizeof(*cell)))
+               return -EFAULT;
+
+       if (cell->num_preload_images != 1)
+               return -EINVAL;
+
+       if (copy_from_user(cell->image, arg->image,
+                          sizeof(*cell->image) * cell->num_preload_images))
+               return -EFAULT;
+
+       config = kmalloc(cell->config_size, GFP_KERNEL | GFP_DMA);
+       if (!config)
+               return -ENOMEM;
+
+       if (copy_from_user(config, (void *)(unsigned long)cell->config_address,
+                          cell->config_size)) {
+               err = -EFAULT;
+               goto kfree_config_out;
+       }
+       config->name[JAILHOUSE_CELL_NAME_MAXLEN] = 0;
+
+       cpu_mask = ((void *)config) + sizeof(struct jailhouse_cell_desc);
+       for (mask_pos = 0; mask_pos < config->cpu_set_size; mask_pos++)
+               for (bit_pos = 0; bit_pos < 8; bit_pos++) {
+                       if (!(cpu_mask[mask_pos] & (1 << bit_pos)))
+                               continue;
+                       cpu = mask_pos * 8 + bit_pos;
+                       if (cpu_online(cpu)) {
+                               err = cpu_down(cpu);
+                               if (err)
+                                       goto kfree_config_out;
+                               cpu_set(cpu, offlined_cpus);
+                       }
+               }
+
+       ram = ((void *)config) + sizeof(struct jailhouse_cell_desc) +
+               config->cpu_set_size;
+       if (config->num_memory_regions < 1 || ram->size < 1024 * 1024 ||
+           image->target_address + image->size > ram->size) {
+               err = -EINVAL;
+               goto kfree_config_out;
+       }
+
+       cell_mem = jailhouse_ioremap(ram->phys_start, ram->size);
+       if (!cell_mem) {
+               err = -EBUSY;
+               goto kfree_config_out;
+       }
+       memset(cell_mem, 0, ram->size);
+
+       if (copy_from_user(cell_mem + image->target_address,
+                          (void *)(unsigned long)image->source_address,
+                          image->size)) {
+               err = -EFAULT;
+               goto iounmap_out;
+       }
+
+       if (mutex_lock_interruptible(&lock) != 0) {
+               err = -EINTR;
+               goto kfree_config_out;
+       }
+
+       if (!enabled) {
+               err = -EINVAL;
+               goto unlock_out;
+       }
+
+       err = jailhouse_call1(JAILHOUSE_HC_CELL_CREATE, __pa(config));
+       if (err)
+               goto unlock_out;
+
+       printk("Created Jailhouse cell \"%s\"\n", config->name);
+
+unlock_out:
+       mutex_unlock(&lock);
+
+iounmap_out:
+       iounmap((__force void __iomem *)cell_mem);
+kfree_config_out:
+       kfree(config);
+
+       return err;
+}
+
+static long jailhouse_ioctl(struct file *file, unsigned int ioctl,
+                           unsigned long arg)
+{
+       long err;
+
+       switch (ioctl) {
+       case JAILHOUSE_ENABLE:
+               err = jailhouse_enable(
+                       (struct jailhouse_system __user *)arg);
+               break;
+       case JAILHOUSE_DISABLE:
+               err = jailhouse_disable();
+               break;
+       case JAILHOUSE_CELL_CREATE:
+               err = jailhouse_cell_create(
+                       (struct jailhouse_new_cell __user *)arg);
+               break;
+       case JAILHOUSE_CELL_DESTROY:
+               err = -ENOSYS;
+               break;
+       default:
+               err = -EINVAL;
+               break;
+       }
+
+       return err;
+}
+
+static const struct file_operations jailhouse_fops = {
+       .owner = THIS_MODULE,
+       .unlocked_ioctl = jailhouse_ioctl,
+       .llseek = noop_llseek,
+};
+
+static struct miscdevice jailhouse_misc_dev = {
+       .minor = MISC_DYNAMIC_MINOR,
+       .name = "jailhouse",
+       .fops = &jailhouse_fops,
+};
+
+static int __init jailhouse_init(void)
+{
+       jailhouse_dev = root_device_register("jailhouse");
+       if (IS_ERR(jailhouse_dev))
+               return PTR_ERR(jailhouse_dev);
+       return misc_register(&jailhouse_misc_dev);
+}
+
+static void __exit jailhouse_exit(void)
+{
+       misc_deregister(&jailhouse_misc_dev);
+       root_device_unregister(jailhouse_dev);
+}
+
+module_init(jailhouse_init);
+module_exit(jailhouse_exit);
diff --git a/tools/Makefile b/tools/Makefile
new file mode 100644 (file)
index 0000000..bfddc3b
--- /dev/null
@@ -0,0 +1,22 @@
+#
+# Jailhouse, a Linux-based partitioning hypervisor
+#
+# Copyright (c) Siemens AG, 2013
+#
+# Authors:
+#  Jan Kiszka <jan.kiszka@siemens.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+
+CC = $(CROSS_COMPILE)gcc
+
+CFLAGS = -g -O3 -I.. -I../hypervisor/include \
+       -Wall -Wmissing-declarations -Wmissing-prototypes
+
+jailhouse: jailhouse.c ../jailhouse.h ../hypervisor/include/jailhouse/cell-config.h
+       $(CC) $(CFLAGS) -o $@ $<
+
+clean:
+       rm -f jailhouse
diff --git a/tools/jailhouse.c b/tools/jailhouse.c
new file mode 100644 (file)
index 0000000..3cc869a
--- /dev/null
@@ -0,0 +1,222 @@
+/*
+ * Jailhouse, a Linux-based partitioning hypervisor
+ *
+ * Copyright (c) Siemens AG, 2013
+ *
+ * Authors:
+ *  Jan Kiszka <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+
+#include <jailhouse.h>
+
+static void help(const char *progname)
+{
+       printf("%s <command> <args>\n"
+              "\nAvailable commands:\n"
+              "   enable CONFIGFILE\n"
+              "   disable\n"
+              "   cell create CONFIGFILE PRELOADIMAGE [-l ADDRESS]\n"
+              "   cell destroy NAME\n",
+              progname);
+}
+
+static int open_dev()
+{
+       int fd;
+
+       fd = open("/dev/jailhouse", O_RDWR);
+       if (fd < 0) {
+               perror("opening /dev/jailhouse");
+               exit(1);
+       }
+       return fd;
+}
+
+static void *read_file(const char *name, size_t *size)
+{
+       struct stat stat;
+       void *buffer;
+       int fd;
+
+       fd = open(name, O_RDONLY);
+       if (fd < 0) {
+               fprintf(stderr, "opening %s: %s\n", name, strerror(errno));
+               exit(1);
+       }
+
+       if (fstat(fd, &stat) < 0) {
+               perror("fstat");
+               exit(1);
+       }
+
+       buffer = malloc(stat.st_size);
+       if (!buffer) {
+               fprintf(stderr, "insufficient memory\n");
+               exit(1);
+       }
+
+       if (read(fd, buffer, stat.st_size) < stat.st_size) {
+               fprintf(stderr, "reading %s: %s\n", name, strerror(errno));
+               exit(1);
+       }
+
+       close(fd);
+
+       if (size)
+               *size = stat.st_size;
+
+       return buffer;
+}
+
+static int enable(int argc, char *argv[])
+{
+       void *config;
+       int err, fd;
+
+       if (argc != 3) {
+               help(argv[0]);
+               exit(1);
+       }
+
+       config = read_file(argv[2], NULL);
+
+       fd = open_dev();
+
+       err = ioctl(fd, JAILHOUSE_ENABLE, config);
+       if (err)
+               perror("JAILHOUSE_ENABLE");
+
+       close(fd);
+       free(config);
+
+       return err;
+}
+
+static int cell_create(int argc, char *argv[])
+{
+       struct {
+               struct jailhouse_new_cell cell;
+               struct jailhouse_preload_image image;
+       } params;
+       struct jailhouse_new_cell *cell = &params.cell;
+       struct jailhouse_preload_image *image = params.cell.image;
+       size_t size;
+       int err, fd;
+       char *endp;
+
+       if (argc != 5 && argc != 7) {
+               help(argv[0]);
+               exit(1);
+       }
+
+       cell->config_address = (unsigned long)read_file(argv[3], &size);
+       cell->config_size = size;
+       cell->num_preload_images = 1;
+
+       image->source_address = (unsigned long)read_file(argv[4], &size);
+       image->size = size;
+       image->target_address = 0;
+
+       if (argc == 7) {
+               errno = 0;
+               image->target_address = strtoll(argv[6], &endp, 0);
+               if (errno != 0 || *endp != 0 || strcmp(argv[5], "-l") != 0) {
+                       help(argv[0]);
+                       exit(1);
+               }
+       }
+
+       fd = open_dev();
+
+       err = ioctl(fd, JAILHOUSE_CELL_CREATE, &params);
+       if (err)
+               perror("JAILHOUSE_CELL_CREATE");
+
+       close(fd);
+       free((void *)(unsigned long)cell->config_address);
+       free((void *)(unsigned long)image->source_address);
+
+       return err;
+}
+
+static int cell_destroy(int argc, char *argv[])
+{
+       int err, fd;
+
+       if (argc != 4) {
+               help(argv[0]);
+               exit(1);
+       }
+
+       fd = open_dev();
+
+       err = ioctl(fd, JAILHOUSE_CELL_DESTROY, argv[3]);
+       if (err)
+               perror("JAILHOUSE_CELL_DESTROY");
+
+       close(fd);
+
+       return err;
+}
+
+static int cell_management(int argc, char *argv[])
+{
+       int err;
+
+       if (argc < 3) {
+               help(argv[0]);
+               exit(1);
+       }
+
+       if (strcmp(argv[2], "create") == 0)
+               err = cell_create(argc, argv);
+       else if (strcmp(argv[2], "destroy") == 0)
+               err = cell_destroy(argc, argv);
+       else {
+               help(argv[0]);
+               exit(1);
+       }
+
+       return err;
+}
+
+int main(int argc, char *argv[])
+{
+       int fd;
+       int err;
+
+       if (argc < 2) {
+               help(argv[0]);
+               exit(1);
+       }
+
+       if (strcmp(argv[1], "enable") == 0) {
+               err = enable(argc, argv);
+       } else if (strcmp(argv[1], "disable") == 0) {
+               fd = open_dev();
+               err = ioctl(fd, JAILHOUSE_DISABLE);
+               if (err)
+                       perror("JAILHOUSE_DISABLE");
+               close(fd);
+       } else if (strcmp(argv[1], "cell") == 0) {
+               err = cell_management(argc, argv);
+       } else {
+               help(argv[0]);
+               exit(1);
+       }
+
+       return err ? 1 : 0;
+}