4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
47 #include "exec/cpu-all.h"
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
52 #include "exec/memory-internal.h"
53 #include "exec/ram_addr.h"
54 #include "qemu/cache-utils.h"
56 #include "qemu/range.h"
58 //#define DEBUG_SUBPAGE
60 #if !defined(CONFIG_USER_ONLY)
61 static bool in_migration;
63 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
65 static MemoryRegion *system_memory;
66 static MemoryRegion *system_io;
68 AddressSpace address_space_io;
69 AddressSpace address_space_memory;
71 MemoryRegion io_mem_rom, io_mem_notdirty;
72 static MemoryRegion io_mem_unassigned;
76 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
77 /* current CPU in the current thread. It is only valid inside
79 DEFINE_TLS(CPUState *, current_cpu);
80 /* 0 = Do not count executed instructions.
81 1 = Precise instruction counting.
82 2 = Adaptive rate instruction counting. */
85 #if !defined(CONFIG_USER_ONLY)
87 typedef struct PhysPageEntry PhysPageEntry;
89 struct PhysPageEntry {
90 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
92 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
96 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
98 /* Size of the L2 (and L3, etc) page tables. */
99 #define ADDR_SPACE_BITS 64
102 #define P_L2_SIZE (1 << P_L2_BITS)
104 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
106 typedef PhysPageEntry Node[P_L2_SIZE];
108 typedef struct PhysPageMap {
109 unsigned sections_nb;
110 unsigned sections_nb_alloc;
112 unsigned nodes_nb_alloc;
114 MemoryRegionSection *sections;
117 struct AddressSpaceDispatch {
118 /* This is a multi-level map on the physical address space.
119 * The bottom level has pointers to MemoryRegionSections.
121 PhysPageEntry phys_map;
126 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
127 typedef struct subpage_t {
131 uint16_t sub_section[TARGET_PAGE_SIZE];
134 #define PHYS_SECTION_UNASSIGNED 0
135 #define PHYS_SECTION_NOTDIRTY 1
136 #define PHYS_SECTION_ROM 2
137 #define PHYS_SECTION_WATCH 3
139 static void io_mem_init(void);
140 static void memory_map_init(void);
142 static MemoryRegion io_mem_watch;
145 #if !defined(CONFIG_USER_ONLY)
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
183 for (i = 0; i < P_L2_SIZE; i++) {
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
189 p = map->nodes[lp->ptr];
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
221 unsigned valid_ptr = P_L2_SIZE;
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
239 phys_page_compact(&p[i], nodes, compacted);
243 /* We can only compress if there's only one child. */
248 assert(valid_ptr < P_L2_SIZE);
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
265 lp->skip += p[valid_ptr].skip;
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
271 DECLARE_BITMAP(compacted, nodes_nb);
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
282 hwaddr index = addr >> TARGET_PAGE_BITS;
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return §ions[PHYS_SECTION_UNASSIGNED];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return §ions[lp.ptr];
298 return §ions[PHYS_SECTION_UNASSIGNED];
302 bool memory_region_is_unassigned(MemoryRegion *mr)
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
310 bool resolve_subpage)
312 MemoryRegionSection *section;
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
327 MemoryRegionSection *section;
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
347 if (memory_region_is_romd(mr)) {
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
359 MemoryRegionSection *section;
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
367 if (!mr->iommu_ops) {
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
380 as = iotlb.target_as;
383 if (memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
400 assert(!section->mr->iommu_ops);
405 void cpu_exec_init_all(void)
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
414 #if !defined(CONFIG_USER_ONLY)
416 static int cpu_common_post_load(void *opaque, int version_id)
418 CPUState *cpu = opaque;
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu->env_ptr, 1);
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
431 .minimum_version_id = 1,
432 .minimum_version_id_old = 1,
433 .post_load = cpu_common_post_load,
434 .fields = (VMStateField []) {
435 VMSTATE_UINT32(halted, CPUState),
436 VMSTATE_UINT32(interrupt_request, CPUState),
437 VMSTATE_END_OF_LIST()
443 CPUState *qemu_get_cpu(int index)
448 if (cpu->cpu_index == index) {
456 void cpu_exec_init(CPUArchState *env)
458 CPUState *cpu = ENV_GET_CPU(env);
459 CPUClass *cc = CPU_GET_CLASS(cpu);
463 #if defined(CONFIG_USER_ONLY)
467 CPU_FOREACH(some_cpu) {
470 cpu->cpu_index = cpu_index;
472 QTAILQ_INIT(&env->breakpoints);
473 QTAILQ_INIT(&env->watchpoints);
474 #ifndef CONFIG_USER_ONLY
475 cpu->thread_id = qemu_get_thread_id();
477 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
478 #if defined(CONFIG_USER_ONLY)
481 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
482 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
484 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
485 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
486 cpu_save, cpu_load, env);
487 assert(cc->vmsd == NULL);
488 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
490 if (cc->vmsd != NULL) {
491 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
495 #if defined(TARGET_HAS_ICE)
496 #if defined(CONFIG_USER_ONLY)
497 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
499 tb_invalidate_phys_page_range(pc, pc + 1, 0);
502 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
504 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
506 tb_invalidate_phys_addr(&address_space_memory,
507 phys | (pc & ~TARGET_PAGE_MASK));
511 #endif /* TARGET_HAS_ICE */
513 #if defined(CONFIG_USER_ONLY)
514 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
519 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
520 int flags, CPUWatchpoint **watchpoint)
525 /* Add a watchpoint. */
526 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
527 int flags, CPUWatchpoint **watchpoint)
529 target_ulong len_mask = ~(len - 1);
532 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
533 if ((len & (len - 1)) || (addr & ~len_mask) ||
534 len == 0 || len > TARGET_PAGE_SIZE) {
535 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
536 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
539 wp = g_malloc(sizeof(*wp));
542 wp->len_mask = len_mask;
545 /* keep all GDB-injected watchpoints in front */
547 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
549 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
551 tlb_flush_page(env, addr);
558 /* Remove a specific watchpoint. */
559 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
562 target_ulong len_mask = ~(len - 1);
565 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
566 if (addr == wp->vaddr && len_mask == wp->len_mask
567 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
568 cpu_watchpoint_remove_by_ref(env, wp);
575 /* Remove a specific watchpoint by reference. */
576 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
578 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
580 tlb_flush_page(env, watchpoint->vaddr);
585 /* Remove all matching watchpoints. */
586 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
588 CPUWatchpoint *wp, *next;
590 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
591 if (wp->flags & mask)
592 cpu_watchpoint_remove_by_ref(env, wp);
597 /* Add a breakpoint. */
598 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
599 CPUBreakpoint **breakpoint)
601 #if defined(TARGET_HAS_ICE)
604 bp = g_malloc(sizeof(*bp));
609 /* keep all GDB-injected breakpoints in front */
610 if (flags & BP_GDB) {
611 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
613 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
616 breakpoint_invalidate(ENV_GET_CPU(env), pc);
627 /* Remove a specific breakpoint. */
628 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
630 #if defined(TARGET_HAS_ICE)
633 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
634 if (bp->pc == pc && bp->flags == flags) {
635 cpu_breakpoint_remove_by_ref(env, bp);
645 /* Remove a specific breakpoint by reference. */
646 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
648 #if defined(TARGET_HAS_ICE)
649 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
651 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
657 /* Remove all matching breakpoints. */
658 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
660 #if defined(TARGET_HAS_ICE)
661 CPUBreakpoint *bp, *next;
663 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
664 if (bp->flags & mask)
665 cpu_breakpoint_remove_by_ref(env, bp);
670 /* enable or disable single step mode. EXCP_DEBUG is returned by the
671 CPU loop after each instruction */
672 void cpu_single_step(CPUState *cpu, int enabled)
674 #if defined(TARGET_HAS_ICE)
675 if (cpu->singlestep_enabled != enabled) {
676 cpu->singlestep_enabled = enabled;
678 kvm_update_guest_debug(cpu, 0);
680 /* must flush all the translated code to avoid inconsistencies */
681 /* XXX: only flush what is necessary */
682 CPUArchState *env = cpu->env_ptr;
689 void cpu_abort(CPUArchState *env, const char *fmt, ...)
691 CPUState *cpu = ENV_GET_CPU(env);
697 fprintf(stderr, "qemu: fatal: ");
698 vfprintf(stderr, fmt, ap);
699 fprintf(stderr, "\n");
700 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
701 if (qemu_log_enabled()) {
702 qemu_log("qemu: fatal: ");
703 qemu_log_vprintf(fmt, ap2);
705 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
711 #if defined(CONFIG_USER_ONLY)
713 struct sigaction act;
714 sigfillset(&act.sa_mask);
715 act.sa_handler = SIG_DFL;
716 sigaction(SIGABRT, &act, NULL);
722 #if !defined(CONFIG_USER_ONLY)
723 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
727 /* The list is protected by the iothread lock here. */
728 block = ram_list.mru_block;
729 if (block && addr - block->offset < block->length) {
732 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
733 if (addr - block->offset < block->length) {
738 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
742 ram_list.mru_block = block;
746 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
752 end = TARGET_PAGE_ALIGN(start + length);
753 start &= TARGET_PAGE_MASK;
755 block = qemu_get_ram_block(start);
756 assert(block == qemu_get_ram_block(end - 1));
757 start1 = (uintptr_t)block->host + (start - block->offset);
758 cpu_tlb_reset_dirty_all(start1, length);
761 /* Note: start and end must be within the same ram block. */
762 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
767 cpu_physical_memory_clear_dirty_range(start, length, client);
770 tlb_reset_dirty_range_all(start, length);
774 static void cpu_physical_memory_set_dirty_tracking(bool enable)
776 in_migration = enable;
779 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
780 MemoryRegionSection *section,
782 hwaddr paddr, hwaddr xlat,
784 target_ulong *address)
789 if (memory_region_is_ram(section->mr)) {
791 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
793 if (!section->readonly) {
794 iotlb |= PHYS_SECTION_NOTDIRTY;
796 iotlb |= PHYS_SECTION_ROM;
799 iotlb = section - address_space_memory.dispatch->map.sections;
803 /* Make accesses to pages with watchpoints go via the
804 watchpoint trap routines. */
805 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
806 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
807 /* Avoid trapping reads of pages with a write breakpoint. */
808 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
809 iotlb = PHYS_SECTION_WATCH + paddr;
810 *address |= TLB_MMIO;
818 #endif /* defined(CONFIG_USER_ONLY) */
820 #if !defined(CONFIG_USER_ONLY)
822 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
824 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
826 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
829 * Set a custom physical guest memory alloator.
830 * Accelerators with unusual needs may need this. Hopefully, we can
831 * get rid of it eventually.
833 void phys_mem_set_alloc(void *(*alloc)(size_t))
835 phys_mem_alloc = alloc;
838 static uint16_t phys_section_add(PhysPageMap *map,
839 MemoryRegionSection *section)
841 /* The physical section number is ORed with a page-aligned
842 * pointer to produce the iotlb entries. Thus it should
843 * never overflow into the page-aligned value.
845 assert(map->sections_nb < TARGET_PAGE_SIZE);
847 if (map->sections_nb == map->sections_nb_alloc) {
848 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
849 map->sections = g_renew(MemoryRegionSection, map->sections,
850 map->sections_nb_alloc);
852 map->sections[map->sections_nb] = *section;
853 memory_region_ref(section->mr);
854 return map->sections_nb++;
857 static void phys_section_destroy(MemoryRegion *mr)
859 memory_region_unref(mr);
862 subpage_t *subpage = container_of(mr, subpage_t, iomem);
863 memory_region_destroy(&subpage->iomem);
868 static void phys_sections_free(PhysPageMap *map)
870 while (map->sections_nb > 0) {
871 MemoryRegionSection *section = &map->sections[--map->sections_nb];
872 phys_section_destroy(section->mr);
874 g_free(map->sections);
878 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
881 hwaddr base = section->offset_within_address_space
883 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
884 d->map.nodes, d->map.sections);
885 MemoryRegionSection subsection = {
886 .offset_within_address_space = base,
887 .size = int128_make64(TARGET_PAGE_SIZE),
891 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
893 if (!(existing->mr->subpage)) {
894 subpage = subpage_init(d->as, base);
895 subsection.address_space = d->as;
896 subsection.mr = &subpage->iomem;
897 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
898 phys_section_add(&d->map, &subsection));
900 subpage = container_of(existing->mr, subpage_t, iomem);
902 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
903 end = start + int128_get64(section->size) - 1;
904 subpage_register(subpage, start, end,
905 phys_section_add(&d->map, section));
909 static void register_multipage(AddressSpaceDispatch *d,
910 MemoryRegionSection *section)
912 hwaddr start_addr = section->offset_within_address_space;
913 uint16_t section_index = phys_section_add(&d->map, section);
914 uint64_t num_pages = int128_get64(int128_rshift(section->size,
918 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
921 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
923 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
924 AddressSpaceDispatch *d = as->next_dispatch;
925 MemoryRegionSection now = *section, remain = *section;
926 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
928 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
929 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
930 - now.offset_within_address_space;
932 now.size = int128_min(int128_make64(left), now.size);
933 register_subpage(d, &now);
935 now.size = int128_zero();
937 while (int128_ne(remain.size, now.size)) {
938 remain.size = int128_sub(remain.size, now.size);
939 remain.offset_within_address_space += int128_get64(now.size);
940 remain.offset_within_region += int128_get64(now.size);
942 if (int128_lt(remain.size, page_size)) {
943 register_subpage(d, &now);
944 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
945 now.size = page_size;
946 register_subpage(d, &now);
948 now.size = int128_and(now.size, int128_neg(page_size));
949 register_multipage(d, &now);
954 void qemu_flush_coalesced_mmio_buffer(void)
957 kvm_flush_coalesced_mmio_buffer();
960 void qemu_mutex_lock_ramlist(void)
962 qemu_mutex_lock(&ram_list.mutex);
965 void qemu_mutex_unlock_ramlist(void)
967 qemu_mutex_unlock(&ram_list.mutex);
974 #define HUGETLBFS_MAGIC 0x958458f6
976 static long gethugepagesize(const char *path)
982 ret = statfs(path, &fs);
983 } while (ret != 0 && errno == EINTR);
990 if (fs.f_type != HUGETLBFS_MAGIC)
991 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
996 static sigjmp_buf sigjump;
998 static void sigbus_handler(int signal)
1000 siglongjmp(sigjump, 1);
1003 static void *file_ram_alloc(RAMBlock *block,
1008 char *sanitized_name;
1012 unsigned long hpagesize;
1014 hpagesize = gethugepagesize(path);
1019 if (memory < hpagesize) {
1023 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1024 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1028 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1029 sanitized_name = g_strdup(block->mr->name);
1030 for (c = sanitized_name; *c != '\0'; c++) {
1035 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1037 g_free(sanitized_name);
1039 fd = mkstemp(filename);
1041 perror("unable to create backing store for hugepages");
1048 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1051 * ftruncate is not supported by hugetlbfs in older
1052 * hosts, so don't bother bailing out on errors.
1053 * If anything goes wrong with it under other filesystems,
1056 if (ftruncate(fd, memory))
1057 perror("ftruncate");
1059 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1060 if (area == MAP_FAILED) {
1061 perror("file_ram_alloc: can't mmap RAM pages");
1068 struct sigaction act, oldact;
1069 sigset_t set, oldset;
1071 memset(&act, 0, sizeof(act));
1072 act.sa_handler = &sigbus_handler;
1075 ret = sigaction(SIGBUS, &act, &oldact);
1077 perror("file_ram_alloc: failed to install signal handler");
1081 /* unblock SIGBUS */
1083 sigaddset(&set, SIGBUS);
1084 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1086 if (sigsetjmp(sigjump, 1)) {
1087 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1091 /* MAP_POPULATE silently ignores failures */
1092 for (i = 0; i < (memory/hpagesize); i++) {
1093 memset(area + (hpagesize*i), 0, 1);
1096 ret = sigaction(SIGBUS, &oldact, NULL);
1098 perror("file_ram_alloc: failed to reinstall signal handler");
1102 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1109 static void *file_ram_alloc(RAMBlock *block,
1113 fprintf(stderr, "-mem-path not supported on this host\n");
1118 static ram_addr_t find_ram_offset(ram_addr_t size)
1120 RAMBlock *block, *next_block;
1121 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1123 assert(size != 0); /* it would hand out same offset multiple times */
1125 if (QTAILQ_EMPTY(&ram_list.blocks))
1128 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1129 ram_addr_t end, next = RAM_ADDR_MAX;
1131 end = block->offset + block->length;
1133 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1134 if (next_block->offset >= end) {
1135 next = MIN(next, next_block->offset);
1138 if (next - end >= size && next - end < mingap) {
1140 mingap = next - end;
1144 if (offset == RAM_ADDR_MAX) {
1145 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1153 ram_addr_t last_ram_offset(void)
1156 ram_addr_t last = 0;
1158 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1159 last = MAX(last, block->offset + block->length);
1164 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1168 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1169 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1170 "dump-guest-core", true)) {
1171 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1173 perror("qemu_madvise");
1174 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1175 "but dump_guest_core=off specified\n");
1180 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1182 RAMBlock *new_block, *block;
1185 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1186 if (block->offset == addr) {
1192 assert(!new_block->idstr[0]);
1195 char *id = qdev_get_dev_path(dev);
1197 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1201 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1203 /* This assumes the iothread lock is taken here too. */
1204 qemu_mutex_lock_ramlist();
1205 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1206 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1207 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1212 qemu_mutex_unlock_ramlist();
1215 static int memory_try_enable_merging(void *addr, size_t len)
1217 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1218 /* disabled by the user */
1222 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1225 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1228 RAMBlock *block, *new_block;
1229 ram_addr_t old_ram_size, new_ram_size;
1231 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1233 size = TARGET_PAGE_ALIGN(size);
1234 new_block = g_malloc0(sizeof(*new_block));
1237 /* This assumes the iothread lock is taken here too. */
1238 qemu_mutex_lock_ramlist();
1240 new_block->offset = find_ram_offset(size);
1242 new_block->host = host;
1243 new_block->flags |= RAM_PREALLOC_MASK;
1244 } else if (xen_enabled()) {
1246 fprintf(stderr, "-mem-path not supported with Xen\n");
1249 xen_ram_alloc(new_block->offset, size, mr);
1252 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1254 * file_ram_alloc() needs to allocate just like
1255 * phys_mem_alloc, but we haven't bothered to provide
1259 "-mem-path not supported with this accelerator\n");
1262 new_block->host = file_ram_alloc(new_block, size, mem_path);
1264 if (!new_block->host) {
1265 new_block->host = phys_mem_alloc(size);
1266 if (!new_block->host) {
1267 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1268 new_block->mr->name, strerror(errno));
1271 memory_try_enable_merging(new_block->host, size);
1274 new_block->length = size;
1276 /* Keep the list sorted from biggest to smallest block. */
1277 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1278 if (block->length < new_block->length) {
1283 QTAILQ_INSERT_BEFORE(block, new_block, next);
1285 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1287 ram_list.mru_block = NULL;
1290 qemu_mutex_unlock_ramlist();
1292 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1294 if (new_ram_size > old_ram_size) {
1296 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1297 ram_list.dirty_memory[i] =
1298 bitmap_zero_extend(ram_list.dirty_memory[i],
1299 old_ram_size, new_ram_size);
1302 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1304 qemu_ram_setup_dump(new_block->host, size);
1305 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1306 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1309 kvm_setup_guest_memory(new_block->host, size);
1311 return new_block->offset;
1314 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1316 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1319 void qemu_ram_free_from_ptr(ram_addr_t addr)
1323 /* This assumes the iothread lock is taken here too. */
1324 qemu_mutex_lock_ramlist();
1325 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1326 if (addr == block->offset) {
1327 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1328 ram_list.mru_block = NULL;
1334 qemu_mutex_unlock_ramlist();
1337 void qemu_ram_free(ram_addr_t addr)
1341 /* This assumes the iothread lock is taken here too. */
1342 qemu_mutex_lock_ramlist();
1343 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1344 if (addr == block->offset) {
1345 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1346 ram_list.mru_block = NULL;
1348 if (block->flags & RAM_PREALLOC_MASK) {
1350 } else if (xen_enabled()) {
1351 xen_invalidate_map_cache_entry(block->host);
1353 } else if (block->fd >= 0) {
1354 munmap(block->host, block->length);
1358 qemu_anon_ram_free(block->host, block->length);
1364 qemu_mutex_unlock_ramlist();
1369 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1376 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1377 offset = addr - block->offset;
1378 if (offset < block->length) {
1379 vaddr = block->host + offset;
1380 if (block->flags & RAM_PREALLOC_MASK) {
1382 } else if (xen_enabled()) {
1386 munmap(vaddr, length);
1387 if (block->fd >= 0) {
1389 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1392 flags |= MAP_PRIVATE;
1394 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1395 flags, block->fd, offset);
1398 * Remap needs to match alloc. Accelerators that
1399 * set phys_mem_alloc never remap. If they did,
1400 * we'd need a remap hook here.
1402 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1404 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1405 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1408 if (area != vaddr) {
1409 fprintf(stderr, "Could not remap addr: "
1410 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1414 memory_try_enable_merging(vaddr, length);
1415 qemu_ram_setup_dump(vaddr, length);
1421 #endif /* !_WIN32 */
1423 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1424 With the exception of the softmmu code in this file, this should
1425 only be used for local memory (e.g. video ram) that the device owns,
1426 and knows it isn't going to access beyond the end of the block.
1428 It should not be used for general purpose DMA.
1429 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1431 void *qemu_get_ram_ptr(ram_addr_t addr)
1433 RAMBlock *block = qemu_get_ram_block(addr);
1435 if (xen_enabled()) {
1436 /* We need to check if the requested address is in the RAM
1437 * because we don't want to map the entire memory in QEMU.
1438 * In that case just map until the end of the page.
1440 if (block->offset == 0) {
1441 return xen_map_cache(addr, 0, 0);
1442 } else if (block->host == NULL) {
1444 xen_map_cache(block->offset, block->length, 1);
1447 return block->host + (addr - block->offset);
1450 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1451 * but takes a size argument */
1452 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1457 if (xen_enabled()) {
1458 return xen_map_cache(addr, *size, 1);
1462 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1463 if (addr - block->offset < block->length) {
1464 if (addr - block->offset + *size > block->length)
1465 *size = block->length - addr + block->offset;
1466 return block->host + (addr - block->offset);
1470 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1475 /* Some of the softmmu routines need to translate from a host pointer
1476 (typically a TLB entry) back to a ram offset. */
1477 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1480 uint8_t *host = ptr;
1482 if (xen_enabled()) {
1483 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1484 return qemu_get_ram_block(*ram_addr)->mr;
1487 block = ram_list.mru_block;
1488 if (block && block->host && host - block->host < block->length) {
1492 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1493 /* This case append when the block is not mapped. */
1494 if (block->host == NULL) {
1497 if (host - block->host < block->length) {
1505 *ram_addr = block->offset + (host - block->host);
1509 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1510 uint64_t val, unsigned size)
1512 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1513 tb_invalidate_phys_page_fast(ram_addr, size);
1517 stb_p(qemu_get_ram_ptr(ram_addr), val);
1520 stw_p(qemu_get_ram_ptr(ram_addr), val);
1523 stl_p(qemu_get_ram_ptr(ram_addr), val);
1528 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1529 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1530 /* we remove the notdirty callback only if the code has been
1532 if (!cpu_physical_memory_is_clean(ram_addr)) {
1533 CPUArchState *env = current_cpu->env_ptr;
1534 tlb_set_dirty(env, env->mem_io_vaddr);
1538 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1539 unsigned size, bool is_write)
1544 static const MemoryRegionOps notdirty_mem_ops = {
1545 .write = notdirty_mem_write,
1546 .valid.accepts = notdirty_mem_accepts,
1547 .endianness = DEVICE_NATIVE_ENDIAN,
1550 /* Generate a debug exception if a watchpoint has been hit. */
1551 static void check_watchpoint(int offset, int len_mask, int flags)
1553 CPUArchState *env = current_cpu->env_ptr;
1554 target_ulong pc, cs_base;
1559 if (env->watchpoint_hit) {
1560 /* We re-entered the check after replacing the TB. Now raise
1561 * the debug interrupt so that is will trigger after the
1562 * current instruction. */
1563 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1566 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1567 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1568 if ((vaddr == (wp->vaddr & len_mask) ||
1569 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1570 wp->flags |= BP_WATCHPOINT_HIT;
1571 if (!env->watchpoint_hit) {
1572 env->watchpoint_hit = wp;
1573 tb_check_watchpoint(env);
1574 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1575 env->exception_index = EXCP_DEBUG;
1578 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1579 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1580 cpu_resume_from_signal(env, NULL);
1584 wp->flags &= ~BP_WATCHPOINT_HIT;
1589 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1590 so these check for a hit then pass through to the normal out-of-line
1592 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1595 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1597 case 1: return ldub_phys(addr);
1598 case 2: return lduw_phys(addr);
1599 case 4: return ldl_phys(addr);
1604 static void watch_mem_write(void *opaque, hwaddr addr,
1605 uint64_t val, unsigned size)
1607 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1610 stb_phys(addr, val);
1613 stw_phys(addr, val);
1616 stl_phys(addr, val);
1622 static const MemoryRegionOps watch_mem_ops = {
1623 .read = watch_mem_read,
1624 .write = watch_mem_write,
1625 .endianness = DEVICE_NATIVE_ENDIAN,
1628 static uint64_t subpage_read(void *opaque, hwaddr addr,
1631 subpage_t *subpage = opaque;
1634 #if defined(DEBUG_SUBPAGE)
1635 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1636 subpage, len, addr);
1638 address_space_read(subpage->as, addr + subpage->base, buf, len);
1651 static void subpage_write(void *opaque, hwaddr addr,
1652 uint64_t value, unsigned len)
1654 subpage_t *subpage = opaque;
1657 #if defined(DEBUG_SUBPAGE)
1658 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1659 " value %"PRIx64"\n",
1660 __func__, subpage, len, addr, value);
1675 address_space_write(subpage->as, addr + subpage->base, buf, len);
1678 static bool subpage_accepts(void *opaque, hwaddr addr,
1679 unsigned len, bool is_write)
1681 subpage_t *subpage = opaque;
1682 #if defined(DEBUG_SUBPAGE)
1683 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1684 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1687 return address_space_access_valid(subpage->as, addr + subpage->base,
1691 static const MemoryRegionOps subpage_ops = {
1692 .read = subpage_read,
1693 .write = subpage_write,
1694 .valid.accepts = subpage_accepts,
1695 .endianness = DEVICE_NATIVE_ENDIAN,
1698 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1703 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1705 idx = SUBPAGE_IDX(start);
1706 eidx = SUBPAGE_IDX(end);
1707 #if defined(DEBUG_SUBPAGE)
1708 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1709 __func__, mmio, start, end, idx, eidx, section);
1711 for (; idx <= eidx; idx++) {
1712 mmio->sub_section[idx] = section;
1718 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1722 mmio = g_malloc0(sizeof(subpage_t));
1726 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1727 "subpage", TARGET_PAGE_SIZE);
1728 mmio->iomem.subpage = true;
1729 #if defined(DEBUG_SUBPAGE)
1730 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1731 mmio, base, TARGET_PAGE_SIZE);
1733 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1738 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1740 MemoryRegionSection section = {
1741 .address_space = &address_space_memory,
1743 .offset_within_address_space = 0,
1744 .offset_within_region = 0,
1745 .size = int128_2_64(),
1748 return phys_section_add(map, §ion);
1751 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1753 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1756 static void io_mem_init(void)
1758 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1759 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1760 "unassigned", UINT64_MAX);
1761 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1762 "notdirty", UINT64_MAX);
1763 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1764 "watch", UINT64_MAX);
1767 static void mem_begin(MemoryListener *listener)
1769 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1770 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1773 n = dummy_section(&d->map, &io_mem_unassigned);
1774 assert(n == PHYS_SECTION_UNASSIGNED);
1775 n = dummy_section(&d->map, &io_mem_notdirty);
1776 assert(n == PHYS_SECTION_NOTDIRTY);
1777 n = dummy_section(&d->map, &io_mem_rom);
1778 assert(n == PHYS_SECTION_ROM);
1779 n = dummy_section(&d->map, &io_mem_watch);
1780 assert(n == PHYS_SECTION_WATCH);
1782 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1784 as->next_dispatch = d;
1787 static void mem_commit(MemoryListener *listener)
1789 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1790 AddressSpaceDispatch *cur = as->dispatch;
1791 AddressSpaceDispatch *next = as->next_dispatch;
1793 phys_page_compact_all(next, next->map.nodes_nb);
1795 as->dispatch = next;
1798 phys_sections_free(&cur->map);
1803 static void tcg_commit(MemoryListener *listener)
1807 /* since each CPU stores ram addresses in its TLB cache, we must
1808 reset the modified entries */
1811 CPUArchState *env = cpu->env_ptr;
1817 static void core_log_global_start(MemoryListener *listener)
1819 cpu_physical_memory_set_dirty_tracking(true);
1822 static void core_log_global_stop(MemoryListener *listener)
1824 cpu_physical_memory_set_dirty_tracking(false);
1827 static MemoryListener core_memory_listener = {
1828 .log_global_start = core_log_global_start,
1829 .log_global_stop = core_log_global_stop,
1833 static MemoryListener tcg_memory_listener = {
1834 .commit = tcg_commit,
1837 void address_space_init_dispatch(AddressSpace *as)
1839 as->dispatch = NULL;
1840 as->dispatch_listener = (MemoryListener) {
1842 .commit = mem_commit,
1843 .region_add = mem_add,
1844 .region_nop = mem_add,
1847 memory_listener_register(&as->dispatch_listener, as);
1850 void address_space_destroy_dispatch(AddressSpace *as)
1852 AddressSpaceDispatch *d = as->dispatch;
1854 memory_listener_unregister(&as->dispatch_listener);
1856 as->dispatch = NULL;
1859 static void memory_map_init(void)
1861 system_memory = g_malloc(sizeof(*system_memory));
1863 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1864 address_space_init(&address_space_memory, system_memory, "memory");
1866 system_io = g_malloc(sizeof(*system_io));
1867 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1869 address_space_init(&address_space_io, system_io, "I/O");
1871 memory_listener_register(&core_memory_listener, &address_space_memory);
1872 if (tcg_enabled()) {
1873 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1877 MemoryRegion *get_system_memory(void)
1879 return system_memory;
1882 MemoryRegion *get_system_io(void)
1887 #endif /* !defined(CONFIG_USER_ONLY) */
1889 /* physical memory access (slow version, mainly for debug) */
1890 #if defined(CONFIG_USER_ONLY)
1891 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1892 uint8_t *buf, int len, int is_write)
1899 page = addr & TARGET_PAGE_MASK;
1900 l = (page + TARGET_PAGE_SIZE) - addr;
1903 flags = page_get_flags(page);
1904 if (!(flags & PAGE_VALID))
1907 if (!(flags & PAGE_WRITE))
1909 /* XXX: this code should not depend on lock_user */
1910 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1913 unlock_user(p, addr, l);
1915 if (!(flags & PAGE_READ))
1917 /* XXX: this code should not depend on lock_user */
1918 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1921 unlock_user(p, addr, 0);
1932 static void invalidate_and_set_dirty(hwaddr addr,
1935 if (cpu_physical_memory_is_clean(addr)) {
1936 /* invalidate code */
1937 tb_invalidate_phys_page_range(addr, addr + length, 0);
1939 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1940 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1942 xen_modified_memory(addr, length);
1945 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1947 unsigned access_size_max = mr->ops->valid.max_access_size;
1949 /* Regions are assumed to support 1-4 byte accesses unless
1950 otherwise specified. */
1951 if (access_size_max == 0) {
1952 access_size_max = 4;
1955 /* Bound the maximum access by the alignment of the address. */
1956 if (!mr->ops->impl.unaligned) {
1957 unsigned align_size_max = addr & -addr;
1958 if (align_size_max != 0 && align_size_max < access_size_max) {
1959 access_size_max = align_size_max;
1963 /* Don't attempt accesses larger than the maximum. */
1964 if (l > access_size_max) {
1965 l = access_size_max;
1968 l = 1 << (qemu_fls(l) - 1);
1974 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1975 int len, bool is_write)
1986 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1989 if (!memory_access_is_direct(mr, is_write)) {
1990 l = memory_access_size(mr, l, addr1);
1991 /* XXX: could force current_cpu to NULL to avoid
1995 /* 64 bit write access */
1997 error |= io_mem_write(mr, addr1, val, 8);
2000 /* 32 bit write access */
2002 error |= io_mem_write(mr, addr1, val, 4);
2005 /* 16 bit write access */
2007 error |= io_mem_write(mr, addr1, val, 2);
2010 /* 8 bit write access */
2012 error |= io_mem_write(mr, addr1, val, 1);
2018 addr1 += memory_region_get_ram_addr(mr);
2020 ptr = qemu_get_ram_ptr(addr1);
2021 memcpy(ptr, buf, l);
2022 invalidate_and_set_dirty(addr1, l);
2025 if (!memory_access_is_direct(mr, is_write)) {
2027 l = memory_access_size(mr, l, addr1);
2030 /* 64 bit read access */
2031 error |= io_mem_read(mr, addr1, &val, 8);
2035 /* 32 bit read access */
2036 error |= io_mem_read(mr, addr1, &val, 4);
2040 /* 16 bit read access */
2041 error |= io_mem_read(mr, addr1, &val, 2);
2045 /* 8 bit read access */
2046 error |= io_mem_read(mr, addr1, &val, 1);
2054 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2055 memcpy(buf, ptr, l);
2066 bool address_space_write(AddressSpace *as, hwaddr addr,
2067 const uint8_t *buf, int len)
2069 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2072 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2074 return address_space_rw(as, addr, buf, len, false);
2078 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2079 int len, int is_write)
2081 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2084 enum write_rom_type {
2089 static inline void cpu_physical_memory_write_rom_internal(
2090 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2099 mr = address_space_translate(&address_space_memory,
2100 addr, &addr1, &l, true);
2102 if (!(memory_region_is_ram(mr) ||
2103 memory_region_is_romd(mr))) {
2106 addr1 += memory_region_get_ram_addr(mr);
2108 ptr = qemu_get_ram_ptr(addr1);
2111 memcpy(ptr, buf, l);
2112 invalidate_and_set_dirty(addr1, l);
2115 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2125 /* used for ROM loading : can write in RAM and ROM */
2126 void cpu_physical_memory_write_rom(hwaddr addr,
2127 const uint8_t *buf, int len)
2129 cpu_physical_memory_write_rom_internal(addr, buf, len, WRITE_DATA);
2132 void cpu_flush_icache_range(hwaddr start, int len)
2135 * This function should do the same thing as an icache flush that was
2136 * triggered from within the guest. For TCG we are always cache coherent,
2137 * so there is no need to flush anything. For KVM / Xen we need to flush
2138 * the host's instruction cache at least.
2140 if (tcg_enabled()) {
2144 cpu_physical_memory_write_rom_internal(start, NULL, len, FLUSH_CACHE);
2154 static BounceBuffer bounce;
2156 typedef struct MapClient {
2158 void (*callback)(void *opaque);
2159 QLIST_ENTRY(MapClient) link;
2162 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2163 = QLIST_HEAD_INITIALIZER(map_client_list);
2165 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2167 MapClient *client = g_malloc(sizeof(*client));
2169 client->opaque = opaque;
2170 client->callback = callback;
2171 QLIST_INSERT_HEAD(&map_client_list, client, link);
2175 static void cpu_unregister_map_client(void *_client)
2177 MapClient *client = (MapClient *)_client;
2179 QLIST_REMOVE(client, link);
2183 static void cpu_notify_map_clients(void)
2187 while (!QLIST_EMPTY(&map_client_list)) {
2188 client = QLIST_FIRST(&map_client_list);
2189 client->callback(client->opaque);
2190 cpu_unregister_map_client(client);
2194 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2201 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2202 if (!memory_access_is_direct(mr, is_write)) {
2203 l = memory_access_size(mr, l, addr);
2204 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2215 /* Map a physical memory region into a host virtual address.
2216 * May map a subset of the requested range, given by and returned in *plen.
2217 * May return NULL if resources needed to perform the mapping are exhausted.
2218 * Use only for reads OR writes - not for read-modify-write operations.
2219 * Use cpu_register_map_client() to know when retrying the map operation is
2220 * likely to succeed.
2222 void *address_space_map(AddressSpace *as,
2229 hwaddr l, xlat, base;
2230 MemoryRegion *mr, *this_mr;
2238 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2239 if (!memory_access_is_direct(mr, is_write)) {
2240 if (bounce.buffer) {
2243 /* Avoid unbounded allocations */
2244 l = MIN(l, TARGET_PAGE_SIZE);
2245 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2249 memory_region_ref(mr);
2252 address_space_read(as, addr, bounce.buffer, l);
2256 return bounce.buffer;
2260 raddr = memory_region_get_ram_addr(mr);
2271 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2272 if (this_mr != mr || xlat != base + done) {
2277 memory_region_ref(mr);
2279 return qemu_ram_ptr_length(raddr + base, plen);
2282 /* Unmaps a memory region previously mapped by address_space_map().
2283 * Will also mark the memory as dirty if is_write == 1. access_len gives
2284 * the amount of memory that was actually read or written by the caller.
2286 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2287 int is_write, hwaddr access_len)
2289 if (buffer != bounce.buffer) {
2293 mr = qemu_ram_addr_from_host(buffer, &addr1);
2296 while (access_len) {
2298 l = TARGET_PAGE_SIZE;
2301 invalidate_and_set_dirty(addr1, l);
2306 if (xen_enabled()) {
2307 xen_invalidate_map_cache_entry(buffer);
2309 memory_region_unref(mr);
2313 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2315 qemu_vfree(bounce.buffer);
2316 bounce.buffer = NULL;
2317 memory_region_unref(bounce.mr);
2318 cpu_notify_map_clients();
2321 void *cpu_physical_memory_map(hwaddr addr,
2325 return address_space_map(&address_space_memory, addr, plen, is_write);
2328 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2329 int is_write, hwaddr access_len)
2331 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2334 /* warning: addr must be aligned */
2335 static inline uint32_t ldl_phys_internal(hwaddr addr,
2336 enum device_endian endian)
2344 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2346 if (l < 4 || !memory_access_is_direct(mr, false)) {
2348 io_mem_read(mr, addr1, &val, 4);
2349 #if defined(TARGET_WORDS_BIGENDIAN)
2350 if (endian == DEVICE_LITTLE_ENDIAN) {
2354 if (endian == DEVICE_BIG_ENDIAN) {
2360 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2364 case DEVICE_LITTLE_ENDIAN:
2365 val = ldl_le_p(ptr);
2367 case DEVICE_BIG_ENDIAN:
2368 val = ldl_be_p(ptr);
2378 uint32_t ldl_phys(hwaddr addr)
2380 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2383 uint32_t ldl_le_phys(hwaddr addr)
2385 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2388 uint32_t ldl_be_phys(hwaddr addr)
2390 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2393 /* warning: addr must be aligned */
2394 static inline uint64_t ldq_phys_internal(hwaddr addr,
2395 enum device_endian endian)
2403 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2405 if (l < 8 || !memory_access_is_direct(mr, false)) {
2407 io_mem_read(mr, addr1, &val, 8);
2408 #if defined(TARGET_WORDS_BIGENDIAN)
2409 if (endian == DEVICE_LITTLE_ENDIAN) {
2413 if (endian == DEVICE_BIG_ENDIAN) {
2419 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2423 case DEVICE_LITTLE_ENDIAN:
2424 val = ldq_le_p(ptr);
2426 case DEVICE_BIG_ENDIAN:
2427 val = ldq_be_p(ptr);
2437 uint64_t ldq_phys(hwaddr addr)
2439 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2442 uint64_t ldq_le_phys(hwaddr addr)
2444 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2447 uint64_t ldq_be_phys(hwaddr addr)
2449 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2453 uint32_t ldub_phys(hwaddr addr)
2456 cpu_physical_memory_read(addr, &val, 1);
2460 /* warning: addr must be aligned */
2461 static inline uint32_t lduw_phys_internal(hwaddr addr,
2462 enum device_endian endian)
2470 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2472 if (l < 2 || !memory_access_is_direct(mr, false)) {
2474 io_mem_read(mr, addr1, &val, 2);
2475 #if defined(TARGET_WORDS_BIGENDIAN)
2476 if (endian == DEVICE_LITTLE_ENDIAN) {
2480 if (endian == DEVICE_BIG_ENDIAN) {
2486 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2490 case DEVICE_LITTLE_ENDIAN:
2491 val = lduw_le_p(ptr);
2493 case DEVICE_BIG_ENDIAN:
2494 val = lduw_be_p(ptr);
2504 uint32_t lduw_phys(hwaddr addr)
2506 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2509 uint32_t lduw_le_phys(hwaddr addr)
2511 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2514 uint32_t lduw_be_phys(hwaddr addr)
2516 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2519 /* warning: addr must be aligned. The ram page is not masked as dirty
2520 and the code inside is not invalidated. It is useful if the dirty
2521 bits are used to track modified PTEs */
2522 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2529 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2531 if (l < 4 || !memory_access_is_direct(mr, true)) {
2532 io_mem_write(mr, addr1, val, 4);
2534 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2535 ptr = qemu_get_ram_ptr(addr1);
2538 if (unlikely(in_migration)) {
2539 if (cpu_physical_memory_is_clean(addr1)) {
2540 /* invalidate code */
2541 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2543 cpu_physical_memory_set_dirty_flag(addr1,
2544 DIRTY_MEMORY_MIGRATION);
2545 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2551 /* warning: addr must be aligned */
2552 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2553 enum device_endian endian)
2560 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2562 if (l < 4 || !memory_access_is_direct(mr, true)) {
2563 #if defined(TARGET_WORDS_BIGENDIAN)
2564 if (endian == DEVICE_LITTLE_ENDIAN) {
2568 if (endian == DEVICE_BIG_ENDIAN) {
2572 io_mem_write(mr, addr1, val, 4);
2575 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2576 ptr = qemu_get_ram_ptr(addr1);
2578 case DEVICE_LITTLE_ENDIAN:
2581 case DEVICE_BIG_ENDIAN:
2588 invalidate_and_set_dirty(addr1, 4);
2592 void stl_phys(hwaddr addr, uint32_t val)
2594 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2597 void stl_le_phys(hwaddr addr, uint32_t val)
2599 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2602 void stl_be_phys(hwaddr addr, uint32_t val)
2604 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2608 void stb_phys(hwaddr addr, uint32_t val)
2611 cpu_physical_memory_write(addr, &v, 1);
2614 /* warning: addr must be aligned */
2615 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2616 enum device_endian endian)
2623 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2625 if (l < 2 || !memory_access_is_direct(mr, true)) {
2626 #if defined(TARGET_WORDS_BIGENDIAN)
2627 if (endian == DEVICE_LITTLE_ENDIAN) {
2631 if (endian == DEVICE_BIG_ENDIAN) {
2635 io_mem_write(mr, addr1, val, 2);
2638 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2639 ptr = qemu_get_ram_ptr(addr1);
2641 case DEVICE_LITTLE_ENDIAN:
2644 case DEVICE_BIG_ENDIAN:
2651 invalidate_and_set_dirty(addr1, 2);
2655 void stw_phys(hwaddr addr, uint32_t val)
2657 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2660 void stw_le_phys(hwaddr addr, uint32_t val)
2662 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2665 void stw_be_phys(hwaddr addr, uint32_t val)
2667 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2671 void stq_phys(hwaddr addr, uint64_t val)
2674 cpu_physical_memory_write(addr, &val, 8);
2677 void stq_le_phys(hwaddr addr, uint64_t val)
2679 val = cpu_to_le64(val);
2680 cpu_physical_memory_write(addr, &val, 8);
2683 void stq_be_phys(hwaddr addr, uint64_t val)
2685 val = cpu_to_be64(val);
2686 cpu_physical_memory_write(addr, &val, 8);
2689 /* virtual memory access for debug (includes writing to ROM) */
2690 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2691 uint8_t *buf, int len, int is_write)
2698 page = addr & TARGET_PAGE_MASK;
2699 phys_addr = cpu_get_phys_page_debug(cpu, page);
2700 /* if no physical page mapped, return an error */
2701 if (phys_addr == -1)
2703 l = (page + TARGET_PAGE_SIZE) - addr;
2706 phys_addr += (addr & ~TARGET_PAGE_MASK);
2708 cpu_physical_memory_write_rom(phys_addr, buf, l);
2710 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2719 #if !defined(CONFIG_USER_ONLY)
2722 * A helper function for the _utterly broken_ virtio device model to find out if
2723 * it's running on a big endian machine. Don't do this at home kids!
2725 bool virtio_is_big_endian(void);
2726 bool virtio_is_big_endian(void)
2728 #if defined(TARGET_WORDS_BIGENDIAN)
2737 #ifndef CONFIG_USER_ONLY
2738 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2743 mr = address_space_translate(&address_space_memory,
2744 phys_addr, &phys_addr, &l, false);
2746 return !(memory_region_is_ram(mr) ||
2747 memory_region_is_romd(mr));
2750 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2754 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2755 func(block->host, block->offset, block->length, opaque);