4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
47 #include "exec/cpu-all.h"
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
52 #include "exec/memory-internal.h"
53 #include "exec/ram_addr.h"
54 #include "qemu/cache-utils.h"
56 #include "qemu/range.h"
58 //#define DEBUG_SUBPAGE
60 #if !defined(CONFIG_USER_ONLY)
61 static bool in_migration;
63 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
65 static MemoryRegion *system_memory;
66 static MemoryRegion *system_io;
68 AddressSpace address_space_io;
69 AddressSpace address_space_memory;
71 MemoryRegion io_mem_rom, io_mem_notdirty;
72 static MemoryRegion io_mem_unassigned;
76 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
77 /* current CPU in the current thread. It is only valid inside
79 DEFINE_TLS(CPUState *, current_cpu);
80 /* 0 = Do not count executed instructions.
81 1 = Precise instruction counting.
82 2 = Adaptive rate instruction counting. */
85 #if !defined(CONFIG_USER_ONLY)
87 typedef struct PhysPageEntry PhysPageEntry;
89 struct PhysPageEntry {
90 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
92 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
96 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
98 /* Size of the L2 (and L3, etc) page tables. */
99 #define ADDR_SPACE_BITS 64
102 #define P_L2_SIZE (1 << P_L2_BITS)
104 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
106 typedef PhysPageEntry Node[P_L2_SIZE];
108 typedef struct PhysPageMap {
109 unsigned sections_nb;
110 unsigned sections_nb_alloc;
112 unsigned nodes_nb_alloc;
114 MemoryRegionSection *sections;
117 struct AddressSpaceDispatch {
118 /* This is a multi-level map on the physical address space.
119 * The bottom level has pointers to MemoryRegionSections.
121 PhysPageEntry phys_map;
126 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
127 typedef struct subpage_t {
131 uint16_t sub_section[TARGET_PAGE_SIZE];
134 #define PHYS_SECTION_UNASSIGNED 0
135 #define PHYS_SECTION_NOTDIRTY 1
136 #define PHYS_SECTION_ROM 2
137 #define PHYS_SECTION_WATCH 3
139 static void io_mem_init(void);
140 static void memory_map_init(void);
142 static MemoryRegion io_mem_watch;
145 #if !defined(CONFIG_USER_ONLY)
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
183 for (i = 0; i < P_L2_SIZE; i++) {
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
189 p = map->nodes[lp->ptr];
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
221 unsigned valid_ptr = P_L2_SIZE;
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
239 phys_page_compact(&p[i], nodes, compacted);
243 /* We can only compress if there's only one child. */
248 assert(valid_ptr < P_L2_SIZE);
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
265 lp->skip += p[valid_ptr].skip;
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
271 DECLARE_BITMAP(compacted, nodes_nb);
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
282 hwaddr index = addr >> TARGET_PAGE_BITS;
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return §ions[PHYS_SECTION_UNASSIGNED];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return §ions[lp.ptr];
298 return §ions[PHYS_SECTION_UNASSIGNED];
302 bool memory_region_is_unassigned(MemoryRegion *mr)
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
310 bool resolve_subpage)
312 MemoryRegionSection *section;
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
327 MemoryRegionSection *section;
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
347 if (memory_region_is_romd(mr)) {
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
359 MemoryRegionSection *section;
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
367 if (!mr->iommu_ops) {
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
380 as = iotlb.target_as;
383 if (memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
400 assert(!section->mr->iommu_ops);
405 void cpu_exec_init_all(void)
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
414 #if !defined(CONFIG_USER_ONLY)
416 static int cpu_common_post_load(void *opaque, int version_id)
418 CPUState *cpu = opaque;
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu->env_ptr, 1);
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
431 .minimum_version_id = 1,
432 .minimum_version_id_old = 1,
433 .post_load = cpu_common_post_load,
434 .fields = (VMStateField []) {
435 VMSTATE_UINT32(halted, CPUState),
436 VMSTATE_UINT32(interrupt_request, CPUState),
437 VMSTATE_END_OF_LIST()
443 CPUState *qemu_get_cpu(int index)
448 if (cpu->cpu_index == index) {
456 void cpu_exec_init(CPUArchState *env)
458 CPUState *cpu = ENV_GET_CPU(env);
459 CPUClass *cc = CPU_GET_CLASS(cpu);
463 #if defined(CONFIG_USER_ONLY)
467 CPU_FOREACH(some_cpu) {
470 cpu->cpu_index = cpu_index;
472 QTAILQ_INIT(&env->breakpoints);
473 QTAILQ_INIT(&env->watchpoints);
474 #ifndef CONFIG_USER_ONLY
475 cpu->thread_id = qemu_get_thread_id();
477 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
478 #if defined(CONFIG_USER_ONLY)
481 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
482 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
484 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
485 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
486 cpu_save, cpu_load, env);
487 assert(cc->vmsd == NULL);
488 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
490 if (cc->vmsd != NULL) {
491 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
495 #if defined(TARGET_HAS_ICE)
496 #if defined(CONFIG_USER_ONLY)
497 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
499 tb_invalidate_phys_page_range(pc, pc + 1, 0);
502 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
504 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
506 tb_invalidate_phys_addr(&address_space_memory,
507 phys | (pc & ~TARGET_PAGE_MASK));
511 #endif /* TARGET_HAS_ICE */
513 #if defined(CONFIG_USER_ONLY)
514 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
519 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
520 int flags, CPUWatchpoint **watchpoint)
525 /* Add a watchpoint. */
526 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
527 int flags, CPUWatchpoint **watchpoint)
529 target_ulong len_mask = ~(len - 1);
532 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
533 if ((len & (len - 1)) || (addr & ~len_mask) ||
534 len == 0 || len > TARGET_PAGE_SIZE) {
535 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
536 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
539 wp = g_malloc(sizeof(*wp));
542 wp->len_mask = len_mask;
545 /* keep all GDB-injected watchpoints in front */
547 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
549 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
551 tlb_flush_page(env, addr);
558 /* Remove a specific watchpoint. */
559 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
562 target_ulong len_mask = ~(len - 1);
565 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
566 if (addr == wp->vaddr && len_mask == wp->len_mask
567 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
568 cpu_watchpoint_remove_by_ref(env, wp);
575 /* Remove a specific watchpoint by reference. */
576 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
578 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
580 tlb_flush_page(env, watchpoint->vaddr);
585 /* Remove all matching watchpoints. */
586 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
588 CPUWatchpoint *wp, *next;
590 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
591 if (wp->flags & mask)
592 cpu_watchpoint_remove_by_ref(env, wp);
597 /* Add a breakpoint. */
598 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
599 CPUBreakpoint **breakpoint)
601 #if defined(TARGET_HAS_ICE)
604 bp = g_malloc(sizeof(*bp));
609 /* keep all GDB-injected breakpoints in front */
610 if (flags & BP_GDB) {
611 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
613 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
616 breakpoint_invalidate(ENV_GET_CPU(env), pc);
627 /* Remove a specific breakpoint. */
628 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
630 #if defined(TARGET_HAS_ICE)
633 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
634 if (bp->pc == pc && bp->flags == flags) {
635 cpu_breakpoint_remove_by_ref(env, bp);
645 /* Remove a specific breakpoint by reference. */
646 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
648 #if defined(TARGET_HAS_ICE)
649 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
651 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
657 /* Remove all matching breakpoints. */
658 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
660 #if defined(TARGET_HAS_ICE)
661 CPUBreakpoint *bp, *next;
663 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
664 if (bp->flags & mask)
665 cpu_breakpoint_remove_by_ref(env, bp);
670 /* enable or disable single step mode. EXCP_DEBUG is returned by the
671 CPU loop after each instruction */
672 void cpu_single_step(CPUState *cpu, int enabled)
674 #if defined(TARGET_HAS_ICE)
675 if (cpu->singlestep_enabled != enabled) {
676 cpu->singlestep_enabled = enabled;
678 kvm_update_guest_debug(cpu, 0);
680 /* must flush all the translated code to avoid inconsistencies */
681 /* XXX: only flush what is necessary */
682 CPUArchState *env = cpu->env_ptr;
689 void cpu_abort(CPUArchState *env, const char *fmt, ...)
691 CPUState *cpu = ENV_GET_CPU(env);
697 fprintf(stderr, "qemu: fatal: ");
698 vfprintf(stderr, fmt, ap);
699 fprintf(stderr, "\n");
700 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
701 if (qemu_log_enabled()) {
702 qemu_log("qemu: fatal: ");
703 qemu_log_vprintf(fmt, ap2);
705 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
711 #if defined(CONFIG_USER_ONLY)
713 struct sigaction act;
714 sigfillset(&act.sa_mask);
715 act.sa_handler = SIG_DFL;
716 sigaction(SIGABRT, &act, NULL);
722 #if !defined(CONFIG_USER_ONLY)
723 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
727 /* The list is protected by the iothread lock here. */
728 block = ram_list.mru_block;
729 if (block && addr - block->offset < block->length) {
732 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
733 if (addr - block->offset < block->length) {
738 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
742 ram_list.mru_block = block;
746 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
752 end = TARGET_PAGE_ALIGN(start + length);
753 start &= TARGET_PAGE_MASK;
755 block = qemu_get_ram_block(start);
756 assert(block == qemu_get_ram_block(end - 1));
757 start1 = (uintptr_t)block->host + (start - block->offset);
758 cpu_tlb_reset_dirty_all(start1, length);
761 /* Note: start and end must be within the same ram block. */
762 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
767 cpu_physical_memory_clear_dirty_range(start, length, client);
770 tlb_reset_dirty_range_all(start, length);
774 static void cpu_physical_memory_set_dirty_tracking(bool enable)
776 in_migration = enable;
779 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
780 MemoryRegionSection *section,
782 hwaddr paddr, hwaddr xlat,
784 target_ulong *address)
789 if (memory_region_is_ram(section->mr)) {
791 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
793 if (!section->readonly) {
794 iotlb |= PHYS_SECTION_NOTDIRTY;
796 iotlb |= PHYS_SECTION_ROM;
799 iotlb = section - address_space_memory.dispatch->map.sections;
803 /* Make accesses to pages with watchpoints go via the
804 watchpoint trap routines. */
805 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
806 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
807 /* Avoid trapping reads of pages with a write breakpoint. */
808 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
809 iotlb = PHYS_SECTION_WATCH + paddr;
810 *address |= TLB_MMIO;
818 #endif /* defined(CONFIG_USER_ONLY) */
820 #if !defined(CONFIG_USER_ONLY)
822 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
824 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
826 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
829 * Set a custom physical guest memory alloator.
830 * Accelerators with unusual needs may need this. Hopefully, we can
831 * get rid of it eventually.
833 void phys_mem_set_alloc(void *(*alloc)(size_t))
835 phys_mem_alloc = alloc;
838 static uint16_t phys_section_add(PhysPageMap *map,
839 MemoryRegionSection *section)
841 /* The physical section number is ORed with a page-aligned
842 * pointer to produce the iotlb entries. Thus it should
843 * never overflow into the page-aligned value.
845 assert(map->sections_nb < TARGET_PAGE_SIZE);
847 if (map->sections_nb == map->sections_nb_alloc) {
848 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
849 map->sections = g_renew(MemoryRegionSection, map->sections,
850 map->sections_nb_alloc);
852 map->sections[map->sections_nb] = *section;
853 memory_region_ref(section->mr);
854 return map->sections_nb++;
857 static void phys_section_destroy(MemoryRegion *mr)
859 memory_region_unref(mr);
862 subpage_t *subpage = container_of(mr, subpage_t, iomem);
863 memory_region_destroy(&subpage->iomem);
868 static void phys_sections_free(PhysPageMap *map)
870 while (map->sections_nb > 0) {
871 MemoryRegionSection *section = &map->sections[--map->sections_nb];
872 phys_section_destroy(section->mr);
874 g_free(map->sections);
878 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
881 hwaddr base = section->offset_within_address_space
883 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
884 d->map.nodes, d->map.sections);
885 MemoryRegionSection subsection = {
886 .offset_within_address_space = base,
887 .size = int128_make64(TARGET_PAGE_SIZE),
891 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
893 if (!(existing->mr->subpage)) {
894 subpage = subpage_init(d->as, base);
895 subsection.mr = &subpage->iomem;
896 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
897 phys_section_add(&d->map, &subsection));
899 subpage = container_of(existing->mr, subpage_t, iomem);
901 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
902 end = start + int128_get64(section->size) - 1;
903 subpage_register(subpage, start, end,
904 phys_section_add(&d->map, section));
908 static void register_multipage(AddressSpaceDispatch *d,
909 MemoryRegionSection *section)
911 hwaddr start_addr = section->offset_within_address_space;
912 uint16_t section_index = phys_section_add(&d->map, section);
913 uint64_t num_pages = int128_get64(int128_rshift(section->size,
917 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
920 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
922 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
923 AddressSpaceDispatch *d = as->next_dispatch;
924 MemoryRegionSection now = *section, remain = *section;
925 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
927 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
928 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
929 - now.offset_within_address_space;
931 now.size = int128_min(int128_make64(left), now.size);
932 register_subpage(d, &now);
934 now.size = int128_zero();
936 while (int128_ne(remain.size, now.size)) {
937 remain.size = int128_sub(remain.size, now.size);
938 remain.offset_within_address_space += int128_get64(now.size);
939 remain.offset_within_region += int128_get64(now.size);
941 if (int128_lt(remain.size, page_size)) {
942 register_subpage(d, &now);
943 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
944 now.size = page_size;
945 register_subpage(d, &now);
947 now.size = int128_and(now.size, int128_neg(page_size));
948 register_multipage(d, &now);
953 void qemu_flush_coalesced_mmio_buffer(void)
956 kvm_flush_coalesced_mmio_buffer();
959 void qemu_mutex_lock_ramlist(void)
961 qemu_mutex_lock(&ram_list.mutex);
964 void qemu_mutex_unlock_ramlist(void)
966 qemu_mutex_unlock(&ram_list.mutex);
973 #define HUGETLBFS_MAGIC 0x958458f6
975 static long gethugepagesize(const char *path)
981 ret = statfs(path, &fs);
982 } while (ret != 0 && errno == EINTR);
989 if (fs.f_type != HUGETLBFS_MAGIC)
990 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
995 static sigjmp_buf sigjump;
997 static void sigbus_handler(int signal)
999 siglongjmp(sigjump, 1);
1002 static void *file_ram_alloc(RAMBlock *block,
1007 char *sanitized_name;
1011 unsigned long hpagesize;
1013 hpagesize = gethugepagesize(path);
1018 if (memory < hpagesize) {
1022 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1023 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1027 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1028 sanitized_name = g_strdup(block->mr->name);
1029 for (c = sanitized_name; *c != '\0'; c++) {
1034 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1036 g_free(sanitized_name);
1038 fd = mkstemp(filename);
1040 perror("unable to create backing store for hugepages");
1047 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1050 * ftruncate is not supported by hugetlbfs in older
1051 * hosts, so don't bother bailing out on errors.
1052 * If anything goes wrong with it under other filesystems,
1055 if (ftruncate(fd, memory))
1056 perror("ftruncate");
1058 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1059 if (area == MAP_FAILED) {
1060 perror("file_ram_alloc: can't mmap RAM pages");
1067 struct sigaction act, oldact;
1068 sigset_t set, oldset;
1070 memset(&act, 0, sizeof(act));
1071 act.sa_handler = &sigbus_handler;
1074 ret = sigaction(SIGBUS, &act, &oldact);
1076 perror("file_ram_alloc: failed to install signal handler");
1080 /* unblock SIGBUS */
1082 sigaddset(&set, SIGBUS);
1083 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1085 if (sigsetjmp(sigjump, 1)) {
1086 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1090 /* MAP_POPULATE silently ignores failures */
1091 for (i = 0; i < (memory/hpagesize); i++) {
1092 memset(area + (hpagesize*i), 0, 1);
1095 ret = sigaction(SIGBUS, &oldact, NULL);
1097 perror("file_ram_alloc: failed to reinstall signal handler");
1101 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1108 static void *file_ram_alloc(RAMBlock *block,
1112 fprintf(stderr, "-mem-path not supported on this host\n");
1117 static ram_addr_t find_ram_offset(ram_addr_t size)
1119 RAMBlock *block, *next_block;
1120 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1122 assert(size != 0); /* it would hand out same offset multiple times */
1124 if (QTAILQ_EMPTY(&ram_list.blocks))
1127 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1128 ram_addr_t end, next = RAM_ADDR_MAX;
1130 end = block->offset + block->length;
1132 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1133 if (next_block->offset >= end) {
1134 next = MIN(next, next_block->offset);
1137 if (next - end >= size && next - end < mingap) {
1139 mingap = next - end;
1143 if (offset == RAM_ADDR_MAX) {
1144 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1152 ram_addr_t last_ram_offset(void)
1155 ram_addr_t last = 0;
1157 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1158 last = MAX(last, block->offset + block->length);
1163 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1167 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1168 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1169 "dump-guest-core", true)) {
1170 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1172 perror("qemu_madvise");
1173 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1174 "but dump_guest_core=off specified\n");
1179 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1181 RAMBlock *new_block, *block;
1184 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1185 if (block->offset == addr) {
1191 assert(!new_block->idstr[0]);
1194 char *id = qdev_get_dev_path(dev);
1196 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1200 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1202 /* This assumes the iothread lock is taken here too. */
1203 qemu_mutex_lock_ramlist();
1204 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1205 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1206 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1211 qemu_mutex_unlock_ramlist();
1214 static int memory_try_enable_merging(void *addr, size_t len)
1216 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1217 /* disabled by the user */
1221 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1224 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1227 RAMBlock *block, *new_block;
1228 ram_addr_t old_ram_size, new_ram_size;
1230 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1232 size = TARGET_PAGE_ALIGN(size);
1233 new_block = g_malloc0(sizeof(*new_block));
1236 /* This assumes the iothread lock is taken here too. */
1237 qemu_mutex_lock_ramlist();
1239 new_block->offset = find_ram_offset(size);
1241 new_block->host = host;
1242 new_block->flags |= RAM_PREALLOC_MASK;
1243 } else if (xen_enabled()) {
1245 fprintf(stderr, "-mem-path not supported with Xen\n");
1248 xen_ram_alloc(new_block->offset, size, mr);
1251 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1253 * file_ram_alloc() needs to allocate just like
1254 * phys_mem_alloc, but we haven't bothered to provide
1258 "-mem-path not supported with this accelerator\n");
1261 new_block->host = file_ram_alloc(new_block, size, mem_path);
1263 if (!new_block->host) {
1264 new_block->host = phys_mem_alloc(size);
1265 if (!new_block->host) {
1266 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1267 new_block->mr->name, strerror(errno));
1270 memory_try_enable_merging(new_block->host, size);
1273 new_block->length = size;
1275 /* Keep the list sorted from biggest to smallest block. */
1276 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1277 if (block->length < new_block->length) {
1282 QTAILQ_INSERT_BEFORE(block, new_block, next);
1284 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1286 ram_list.mru_block = NULL;
1289 qemu_mutex_unlock_ramlist();
1291 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1293 if (new_ram_size > old_ram_size) {
1295 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1296 ram_list.dirty_memory[i] =
1297 bitmap_zero_extend(ram_list.dirty_memory[i],
1298 old_ram_size, new_ram_size);
1301 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1303 qemu_ram_setup_dump(new_block->host, size);
1304 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1305 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1308 kvm_setup_guest_memory(new_block->host, size);
1310 return new_block->offset;
1313 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1315 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1318 void qemu_ram_free_from_ptr(ram_addr_t addr)
1322 /* This assumes the iothread lock is taken here too. */
1323 qemu_mutex_lock_ramlist();
1324 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1325 if (addr == block->offset) {
1326 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1327 ram_list.mru_block = NULL;
1333 qemu_mutex_unlock_ramlist();
1336 void qemu_ram_free(ram_addr_t addr)
1340 /* This assumes the iothread lock is taken here too. */
1341 qemu_mutex_lock_ramlist();
1342 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1343 if (addr == block->offset) {
1344 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1345 ram_list.mru_block = NULL;
1347 if (block->flags & RAM_PREALLOC_MASK) {
1349 } else if (xen_enabled()) {
1350 xen_invalidate_map_cache_entry(block->host);
1352 } else if (block->fd >= 0) {
1353 munmap(block->host, block->length);
1357 qemu_anon_ram_free(block->host, block->length);
1363 qemu_mutex_unlock_ramlist();
1368 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1375 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1376 offset = addr - block->offset;
1377 if (offset < block->length) {
1378 vaddr = block->host + offset;
1379 if (block->flags & RAM_PREALLOC_MASK) {
1381 } else if (xen_enabled()) {
1385 munmap(vaddr, length);
1386 if (block->fd >= 0) {
1388 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1391 flags |= MAP_PRIVATE;
1393 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1394 flags, block->fd, offset);
1397 * Remap needs to match alloc. Accelerators that
1398 * set phys_mem_alloc never remap. If they did,
1399 * we'd need a remap hook here.
1401 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1403 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1404 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1407 if (area != vaddr) {
1408 fprintf(stderr, "Could not remap addr: "
1409 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1413 memory_try_enable_merging(vaddr, length);
1414 qemu_ram_setup_dump(vaddr, length);
1420 #endif /* !_WIN32 */
1422 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1423 With the exception of the softmmu code in this file, this should
1424 only be used for local memory (e.g. video ram) that the device owns,
1425 and knows it isn't going to access beyond the end of the block.
1427 It should not be used for general purpose DMA.
1428 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1430 void *qemu_get_ram_ptr(ram_addr_t addr)
1432 RAMBlock *block = qemu_get_ram_block(addr);
1434 if (xen_enabled()) {
1435 /* We need to check if the requested address is in the RAM
1436 * because we don't want to map the entire memory in QEMU.
1437 * In that case just map until the end of the page.
1439 if (block->offset == 0) {
1440 return xen_map_cache(addr, 0, 0);
1441 } else if (block->host == NULL) {
1443 xen_map_cache(block->offset, block->length, 1);
1446 return block->host + (addr - block->offset);
1449 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1450 * but takes a size argument */
1451 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1456 if (xen_enabled()) {
1457 return xen_map_cache(addr, *size, 1);
1461 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1462 if (addr - block->offset < block->length) {
1463 if (addr - block->offset + *size > block->length)
1464 *size = block->length - addr + block->offset;
1465 return block->host + (addr - block->offset);
1469 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1474 /* Some of the softmmu routines need to translate from a host pointer
1475 (typically a TLB entry) back to a ram offset. */
1476 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1479 uint8_t *host = ptr;
1481 if (xen_enabled()) {
1482 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1483 return qemu_get_ram_block(*ram_addr)->mr;
1486 block = ram_list.mru_block;
1487 if (block && block->host && host - block->host < block->length) {
1491 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1492 /* This case append when the block is not mapped. */
1493 if (block->host == NULL) {
1496 if (host - block->host < block->length) {
1504 *ram_addr = block->offset + (host - block->host);
1508 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1509 uint64_t val, unsigned size)
1511 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1512 tb_invalidate_phys_page_fast(ram_addr, size);
1516 stb_p(qemu_get_ram_ptr(ram_addr), val);
1519 stw_p(qemu_get_ram_ptr(ram_addr), val);
1522 stl_p(qemu_get_ram_ptr(ram_addr), val);
1527 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1528 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1529 /* we remove the notdirty callback only if the code has been
1531 if (!cpu_physical_memory_is_clean(ram_addr)) {
1532 CPUArchState *env = current_cpu->env_ptr;
1533 tlb_set_dirty(env, env->mem_io_vaddr);
1537 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1538 unsigned size, bool is_write)
1543 static const MemoryRegionOps notdirty_mem_ops = {
1544 .write = notdirty_mem_write,
1545 .valid.accepts = notdirty_mem_accepts,
1546 .endianness = DEVICE_NATIVE_ENDIAN,
1549 /* Generate a debug exception if a watchpoint has been hit. */
1550 static void check_watchpoint(int offset, int len_mask, int flags)
1552 CPUArchState *env = current_cpu->env_ptr;
1553 target_ulong pc, cs_base;
1558 if (env->watchpoint_hit) {
1559 /* We re-entered the check after replacing the TB. Now raise
1560 * the debug interrupt so that is will trigger after the
1561 * current instruction. */
1562 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1565 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1566 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1567 if ((vaddr == (wp->vaddr & len_mask) ||
1568 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1569 wp->flags |= BP_WATCHPOINT_HIT;
1570 if (!env->watchpoint_hit) {
1571 env->watchpoint_hit = wp;
1572 tb_check_watchpoint(env);
1573 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1574 env->exception_index = EXCP_DEBUG;
1577 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1578 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1579 cpu_resume_from_signal(env, NULL);
1583 wp->flags &= ~BP_WATCHPOINT_HIT;
1588 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1589 so these check for a hit then pass through to the normal out-of-line
1591 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1594 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1596 case 1: return ldub_phys(addr);
1597 case 2: return lduw_phys(addr);
1598 case 4: return ldl_phys(addr);
1603 static void watch_mem_write(void *opaque, hwaddr addr,
1604 uint64_t val, unsigned size)
1606 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1609 stb_phys(addr, val);
1612 stw_phys(addr, val);
1615 stl_phys(addr, val);
1621 static const MemoryRegionOps watch_mem_ops = {
1622 .read = watch_mem_read,
1623 .write = watch_mem_write,
1624 .endianness = DEVICE_NATIVE_ENDIAN,
1627 static uint64_t subpage_read(void *opaque, hwaddr addr,
1630 subpage_t *subpage = opaque;
1633 #if defined(DEBUG_SUBPAGE)
1634 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1635 subpage, len, addr);
1637 address_space_read(subpage->as, addr + subpage->base, buf, len);
1650 static void subpage_write(void *opaque, hwaddr addr,
1651 uint64_t value, unsigned len)
1653 subpage_t *subpage = opaque;
1656 #if defined(DEBUG_SUBPAGE)
1657 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1658 " value %"PRIx64"\n",
1659 __func__, subpage, len, addr, value);
1674 address_space_write(subpage->as, addr + subpage->base, buf, len);
1677 static bool subpage_accepts(void *opaque, hwaddr addr,
1678 unsigned len, bool is_write)
1680 subpage_t *subpage = opaque;
1681 #if defined(DEBUG_SUBPAGE)
1682 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1683 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1686 return address_space_access_valid(subpage->as, addr + subpage->base,
1690 static const MemoryRegionOps subpage_ops = {
1691 .read = subpage_read,
1692 .write = subpage_write,
1693 .valid.accepts = subpage_accepts,
1694 .endianness = DEVICE_NATIVE_ENDIAN,
1697 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1702 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1704 idx = SUBPAGE_IDX(start);
1705 eidx = SUBPAGE_IDX(end);
1706 #if defined(DEBUG_SUBPAGE)
1707 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1708 __func__, mmio, start, end, idx, eidx, section);
1710 for (; idx <= eidx; idx++) {
1711 mmio->sub_section[idx] = section;
1717 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1721 mmio = g_malloc0(sizeof(subpage_t));
1725 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1726 "subpage", TARGET_PAGE_SIZE);
1727 mmio->iomem.subpage = true;
1728 #if defined(DEBUG_SUBPAGE)
1729 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1730 mmio, base, TARGET_PAGE_SIZE);
1732 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1737 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1739 MemoryRegionSection section = {
1741 .offset_within_address_space = 0,
1742 .offset_within_region = 0,
1743 .size = int128_2_64(),
1746 return phys_section_add(map, §ion);
1749 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1751 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1754 static void io_mem_init(void)
1756 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1757 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1758 "unassigned", UINT64_MAX);
1759 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1760 "notdirty", UINT64_MAX);
1761 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1762 "watch", UINT64_MAX);
1765 static void mem_begin(MemoryListener *listener)
1767 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1768 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1771 n = dummy_section(&d->map, &io_mem_unassigned);
1772 assert(n == PHYS_SECTION_UNASSIGNED);
1773 n = dummy_section(&d->map, &io_mem_notdirty);
1774 assert(n == PHYS_SECTION_NOTDIRTY);
1775 n = dummy_section(&d->map, &io_mem_rom);
1776 assert(n == PHYS_SECTION_ROM);
1777 n = dummy_section(&d->map, &io_mem_watch);
1778 assert(n == PHYS_SECTION_WATCH);
1780 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1782 as->next_dispatch = d;
1785 static void mem_commit(MemoryListener *listener)
1787 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1788 AddressSpaceDispatch *cur = as->dispatch;
1789 AddressSpaceDispatch *next = as->next_dispatch;
1791 phys_page_compact_all(next, next->map.nodes_nb);
1793 as->dispatch = next;
1796 phys_sections_free(&cur->map);
1801 static void tcg_commit(MemoryListener *listener)
1805 /* since each CPU stores ram addresses in its TLB cache, we must
1806 reset the modified entries */
1809 CPUArchState *env = cpu->env_ptr;
1815 static void core_log_global_start(MemoryListener *listener)
1817 cpu_physical_memory_set_dirty_tracking(true);
1820 static void core_log_global_stop(MemoryListener *listener)
1822 cpu_physical_memory_set_dirty_tracking(false);
1825 static MemoryListener core_memory_listener = {
1826 .log_global_start = core_log_global_start,
1827 .log_global_stop = core_log_global_stop,
1831 static MemoryListener tcg_memory_listener = {
1832 .commit = tcg_commit,
1835 void address_space_init_dispatch(AddressSpace *as)
1837 as->dispatch = NULL;
1838 as->dispatch_listener = (MemoryListener) {
1840 .commit = mem_commit,
1841 .region_add = mem_add,
1842 .region_nop = mem_add,
1845 memory_listener_register(&as->dispatch_listener, as);
1848 void address_space_destroy_dispatch(AddressSpace *as)
1850 AddressSpaceDispatch *d = as->dispatch;
1852 memory_listener_unregister(&as->dispatch_listener);
1854 as->dispatch = NULL;
1857 static void memory_map_init(void)
1859 system_memory = g_malloc(sizeof(*system_memory));
1861 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1862 address_space_init(&address_space_memory, system_memory, "memory");
1864 system_io = g_malloc(sizeof(*system_io));
1865 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1867 address_space_init(&address_space_io, system_io, "I/O");
1869 memory_listener_register(&core_memory_listener, &address_space_memory);
1870 if (tcg_enabled()) {
1871 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1875 MemoryRegion *get_system_memory(void)
1877 return system_memory;
1880 MemoryRegion *get_system_io(void)
1885 #endif /* !defined(CONFIG_USER_ONLY) */
1887 /* physical memory access (slow version, mainly for debug) */
1888 #if defined(CONFIG_USER_ONLY)
1889 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1890 uint8_t *buf, int len, int is_write)
1897 page = addr & TARGET_PAGE_MASK;
1898 l = (page + TARGET_PAGE_SIZE) - addr;
1901 flags = page_get_flags(page);
1902 if (!(flags & PAGE_VALID))
1905 if (!(flags & PAGE_WRITE))
1907 /* XXX: this code should not depend on lock_user */
1908 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1911 unlock_user(p, addr, l);
1913 if (!(flags & PAGE_READ))
1915 /* XXX: this code should not depend on lock_user */
1916 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1919 unlock_user(p, addr, 0);
1930 static void invalidate_and_set_dirty(hwaddr addr,
1933 if (cpu_physical_memory_is_clean(addr)) {
1934 /* invalidate code */
1935 tb_invalidate_phys_page_range(addr, addr + length, 0);
1937 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1938 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1940 xen_modified_memory(addr, length);
1943 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1945 unsigned access_size_max = mr->ops->valid.max_access_size;
1947 /* Regions are assumed to support 1-4 byte accesses unless
1948 otherwise specified. */
1949 if (access_size_max == 0) {
1950 access_size_max = 4;
1953 /* Bound the maximum access by the alignment of the address. */
1954 if (!mr->ops->impl.unaligned) {
1955 unsigned align_size_max = addr & -addr;
1956 if (align_size_max != 0 && align_size_max < access_size_max) {
1957 access_size_max = align_size_max;
1961 /* Don't attempt accesses larger than the maximum. */
1962 if (l > access_size_max) {
1963 l = access_size_max;
1966 l = 1 << (qemu_fls(l) - 1);
1972 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1973 int len, bool is_write)
1984 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1987 if (!memory_access_is_direct(mr, is_write)) {
1988 l = memory_access_size(mr, l, addr1);
1989 /* XXX: could force current_cpu to NULL to avoid
1993 /* 64 bit write access */
1995 error |= io_mem_write(mr, addr1, val, 8);
1998 /* 32 bit write access */
2000 error |= io_mem_write(mr, addr1, val, 4);
2003 /* 16 bit write access */
2005 error |= io_mem_write(mr, addr1, val, 2);
2008 /* 8 bit write access */
2010 error |= io_mem_write(mr, addr1, val, 1);
2016 addr1 += memory_region_get_ram_addr(mr);
2018 ptr = qemu_get_ram_ptr(addr1);
2019 memcpy(ptr, buf, l);
2020 invalidate_and_set_dirty(addr1, l);
2023 if (!memory_access_is_direct(mr, is_write)) {
2025 l = memory_access_size(mr, l, addr1);
2028 /* 64 bit read access */
2029 error |= io_mem_read(mr, addr1, &val, 8);
2033 /* 32 bit read access */
2034 error |= io_mem_read(mr, addr1, &val, 4);
2038 /* 16 bit read access */
2039 error |= io_mem_read(mr, addr1, &val, 2);
2043 /* 8 bit read access */
2044 error |= io_mem_read(mr, addr1, &val, 1);
2052 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2053 memcpy(buf, ptr, l);
2064 bool address_space_write(AddressSpace *as, hwaddr addr,
2065 const uint8_t *buf, int len)
2067 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2070 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2072 return address_space_rw(as, addr, buf, len, false);
2076 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2077 int len, int is_write)
2079 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2082 enum write_rom_type {
2087 static inline void cpu_physical_memory_write_rom_internal(
2088 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2097 mr = address_space_translate(&address_space_memory,
2098 addr, &addr1, &l, true);
2100 if (!(memory_region_is_ram(mr) ||
2101 memory_region_is_romd(mr))) {
2104 addr1 += memory_region_get_ram_addr(mr);
2106 ptr = qemu_get_ram_ptr(addr1);
2109 memcpy(ptr, buf, l);
2110 invalidate_and_set_dirty(addr1, l);
2113 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2123 /* used for ROM loading : can write in RAM and ROM */
2124 void cpu_physical_memory_write_rom(hwaddr addr,
2125 const uint8_t *buf, int len)
2127 cpu_physical_memory_write_rom_internal(addr, buf, len, WRITE_DATA);
2130 void cpu_flush_icache_range(hwaddr start, int len)
2133 * This function should do the same thing as an icache flush that was
2134 * triggered from within the guest. For TCG we are always cache coherent,
2135 * so there is no need to flush anything. For KVM / Xen we need to flush
2136 * the host's instruction cache at least.
2138 if (tcg_enabled()) {
2142 cpu_physical_memory_write_rom_internal(start, NULL, len, FLUSH_CACHE);
2152 static BounceBuffer bounce;
2154 typedef struct MapClient {
2156 void (*callback)(void *opaque);
2157 QLIST_ENTRY(MapClient) link;
2160 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2161 = QLIST_HEAD_INITIALIZER(map_client_list);
2163 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2165 MapClient *client = g_malloc(sizeof(*client));
2167 client->opaque = opaque;
2168 client->callback = callback;
2169 QLIST_INSERT_HEAD(&map_client_list, client, link);
2173 static void cpu_unregister_map_client(void *_client)
2175 MapClient *client = (MapClient *)_client;
2177 QLIST_REMOVE(client, link);
2181 static void cpu_notify_map_clients(void)
2185 while (!QLIST_EMPTY(&map_client_list)) {
2186 client = QLIST_FIRST(&map_client_list);
2187 client->callback(client->opaque);
2188 cpu_unregister_map_client(client);
2192 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2199 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2200 if (!memory_access_is_direct(mr, is_write)) {
2201 l = memory_access_size(mr, l, addr);
2202 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2213 /* Map a physical memory region into a host virtual address.
2214 * May map a subset of the requested range, given by and returned in *plen.
2215 * May return NULL if resources needed to perform the mapping are exhausted.
2216 * Use only for reads OR writes - not for read-modify-write operations.
2217 * Use cpu_register_map_client() to know when retrying the map operation is
2218 * likely to succeed.
2220 void *address_space_map(AddressSpace *as,
2227 hwaddr l, xlat, base;
2228 MemoryRegion *mr, *this_mr;
2236 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2237 if (!memory_access_is_direct(mr, is_write)) {
2238 if (bounce.buffer) {
2241 /* Avoid unbounded allocations */
2242 l = MIN(l, TARGET_PAGE_SIZE);
2243 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2247 memory_region_ref(mr);
2250 address_space_read(as, addr, bounce.buffer, l);
2254 return bounce.buffer;
2258 raddr = memory_region_get_ram_addr(mr);
2269 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2270 if (this_mr != mr || xlat != base + done) {
2275 memory_region_ref(mr);
2277 return qemu_ram_ptr_length(raddr + base, plen);
2280 /* Unmaps a memory region previously mapped by address_space_map().
2281 * Will also mark the memory as dirty if is_write == 1. access_len gives
2282 * the amount of memory that was actually read or written by the caller.
2284 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2285 int is_write, hwaddr access_len)
2287 if (buffer != bounce.buffer) {
2291 mr = qemu_ram_addr_from_host(buffer, &addr1);
2294 while (access_len) {
2296 l = TARGET_PAGE_SIZE;
2299 invalidate_and_set_dirty(addr1, l);
2304 if (xen_enabled()) {
2305 xen_invalidate_map_cache_entry(buffer);
2307 memory_region_unref(mr);
2311 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2313 qemu_vfree(bounce.buffer);
2314 bounce.buffer = NULL;
2315 memory_region_unref(bounce.mr);
2316 cpu_notify_map_clients();
2319 void *cpu_physical_memory_map(hwaddr addr,
2323 return address_space_map(&address_space_memory, addr, plen, is_write);
2326 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2327 int is_write, hwaddr access_len)
2329 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2332 /* warning: addr must be aligned */
2333 static inline uint32_t ldl_phys_internal(hwaddr addr,
2334 enum device_endian endian)
2342 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2344 if (l < 4 || !memory_access_is_direct(mr, false)) {
2346 io_mem_read(mr, addr1, &val, 4);
2347 #if defined(TARGET_WORDS_BIGENDIAN)
2348 if (endian == DEVICE_LITTLE_ENDIAN) {
2352 if (endian == DEVICE_BIG_ENDIAN) {
2358 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2362 case DEVICE_LITTLE_ENDIAN:
2363 val = ldl_le_p(ptr);
2365 case DEVICE_BIG_ENDIAN:
2366 val = ldl_be_p(ptr);
2376 uint32_t ldl_phys(hwaddr addr)
2378 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2381 uint32_t ldl_le_phys(hwaddr addr)
2383 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2386 uint32_t ldl_be_phys(hwaddr addr)
2388 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2391 /* warning: addr must be aligned */
2392 static inline uint64_t ldq_phys_internal(hwaddr addr,
2393 enum device_endian endian)
2401 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2403 if (l < 8 || !memory_access_is_direct(mr, false)) {
2405 io_mem_read(mr, addr1, &val, 8);
2406 #if defined(TARGET_WORDS_BIGENDIAN)
2407 if (endian == DEVICE_LITTLE_ENDIAN) {
2411 if (endian == DEVICE_BIG_ENDIAN) {
2417 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2421 case DEVICE_LITTLE_ENDIAN:
2422 val = ldq_le_p(ptr);
2424 case DEVICE_BIG_ENDIAN:
2425 val = ldq_be_p(ptr);
2435 uint64_t ldq_phys(hwaddr addr)
2437 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2440 uint64_t ldq_le_phys(hwaddr addr)
2442 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2445 uint64_t ldq_be_phys(hwaddr addr)
2447 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2451 uint32_t ldub_phys(hwaddr addr)
2454 cpu_physical_memory_read(addr, &val, 1);
2458 /* warning: addr must be aligned */
2459 static inline uint32_t lduw_phys_internal(hwaddr addr,
2460 enum device_endian endian)
2468 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2470 if (l < 2 || !memory_access_is_direct(mr, false)) {
2472 io_mem_read(mr, addr1, &val, 2);
2473 #if defined(TARGET_WORDS_BIGENDIAN)
2474 if (endian == DEVICE_LITTLE_ENDIAN) {
2478 if (endian == DEVICE_BIG_ENDIAN) {
2484 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2488 case DEVICE_LITTLE_ENDIAN:
2489 val = lduw_le_p(ptr);
2491 case DEVICE_BIG_ENDIAN:
2492 val = lduw_be_p(ptr);
2502 uint32_t lduw_phys(hwaddr addr)
2504 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2507 uint32_t lduw_le_phys(hwaddr addr)
2509 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2512 uint32_t lduw_be_phys(hwaddr addr)
2514 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2517 /* warning: addr must be aligned. The ram page is not masked as dirty
2518 and the code inside is not invalidated. It is useful if the dirty
2519 bits are used to track modified PTEs */
2520 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2527 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2529 if (l < 4 || !memory_access_is_direct(mr, true)) {
2530 io_mem_write(mr, addr1, val, 4);
2532 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2533 ptr = qemu_get_ram_ptr(addr1);
2536 if (unlikely(in_migration)) {
2537 if (cpu_physical_memory_is_clean(addr1)) {
2538 /* invalidate code */
2539 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2541 cpu_physical_memory_set_dirty_flag(addr1,
2542 DIRTY_MEMORY_MIGRATION);
2543 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2549 /* warning: addr must be aligned */
2550 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2551 enum device_endian endian)
2558 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2560 if (l < 4 || !memory_access_is_direct(mr, true)) {
2561 #if defined(TARGET_WORDS_BIGENDIAN)
2562 if (endian == DEVICE_LITTLE_ENDIAN) {
2566 if (endian == DEVICE_BIG_ENDIAN) {
2570 io_mem_write(mr, addr1, val, 4);
2573 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2574 ptr = qemu_get_ram_ptr(addr1);
2576 case DEVICE_LITTLE_ENDIAN:
2579 case DEVICE_BIG_ENDIAN:
2586 invalidate_and_set_dirty(addr1, 4);
2590 void stl_phys(hwaddr addr, uint32_t val)
2592 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2595 void stl_le_phys(hwaddr addr, uint32_t val)
2597 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2600 void stl_be_phys(hwaddr addr, uint32_t val)
2602 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2606 void stb_phys(hwaddr addr, uint32_t val)
2609 cpu_physical_memory_write(addr, &v, 1);
2612 /* warning: addr must be aligned */
2613 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2614 enum device_endian endian)
2621 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2623 if (l < 2 || !memory_access_is_direct(mr, true)) {
2624 #if defined(TARGET_WORDS_BIGENDIAN)
2625 if (endian == DEVICE_LITTLE_ENDIAN) {
2629 if (endian == DEVICE_BIG_ENDIAN) {
2633 io_mem_write(mr, addr1, val, 2);
2636 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2637 ptr = qemu_get_ram_ptr(addr1);
2639 case DEVICE_LITTLE_ENDIAN:
2642 case DEVICE_BIG_ENDIAN:
2649 invalidate_and_set_dirty(addr1, 2);
2653 void stw_phys(hwaddr addr, uint32_t val)
2655 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2658 void stw_le_phys(hwaddr addr, uint32_t val)
2660 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2663 void stw_be_phys(hwaddr addr, uint32_t val)
2665 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2669 void stq_phys(hwaddr addr, uint64_t val)
2672 cpu_physical_memory_write(addr, &val, 8);
2675 void stq_le_phys(hwaddr addr, uint64_t val)
2677 val = cpu_to_le64(val);
2678 cpu_physical_memory_write(addr, &val, 8);
2681 void stq_be_phys(hwaddr addr, uint64_t val)
2683 val = cpu_to_be64(val);
2684 cpu_physical_memory_write(addr, &val, 8);
2687 /* virtual memory access for debug (includes writing to ROM) */
2688 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2689 uint8_t *buf, int len, int is_write)
2696 page = addr & TARGET_PAGE_MASK;
2697 phys_addr = cpu_get_phys_page_debug(cpu, page);
2698 /* if no physical page mapped, return an error */
2699 if (phys_addr == -1)
2701 l = (page + TARGET_PAGE_SIZE) - addr;
2704 phys_addr += (addr & ~TARGET_PAGE_MASK);
2706 cpu_physical_memory_write_rom(phys_addr, buf, l);
2708 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2717 #if !defined(CONFIG_USER_ONLY)
2720 * A helper function for the _utterly broken_ virtio device model to find out if
2721 * it's running on a big endian machine. Don't do this at home kids!
2723 bool virtio_is_big_endian(void);
2724 bool virtio_is_big_endian(void)
2726 #if defined(TARGET_WORDS_BIGENDIAN)
2735 #ifndef CONFIG_USER_ONLY
2736 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2741 mr = address_space_translate(&address_space_memory,
2742 phys_addr, &phys_addr, &l, false);
2744 return !(memory_region_is_ram(mr) ||
2745 memory_region_is_romd(mr));
2748 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2752 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2753 func(block->host, block->offset, block->length, opaque);