4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
55 #include "qemu/range.h"
57 //#define DEBUG_SUBPAGE
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
84 #if !defined(CONFIG_USER_ONLY)
86 typedef struct PhysPageEntry PhysPageEntry;
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
101 #define P_L2_SIZE (1 << P_L2_BITS)
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
105 typedef PhysPageEntry Node[P_L2_SIZE];
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
111 unsigned nodes_nb_alloc;
113 MemoryRegionSection *sections;
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
120 PhysPageEntry phys_map;
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
130 uint16_t sub_section[TARGET_PAGE_SIZE];
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140 static void tcg_commit(MemoryListener *listener);
142 static MemoryRegion io_mem_watch;
145 #if !defined(CONFIG_USER_ONLY)
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
183 for (i = 0; i < P_L2_SIZE; i++) {
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
189 p = map->nodes[lp->ptr];
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
221 unsigned valid_ptr = P_L2_SIZE;
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
239 phys_page_compact(&p[i], nodes, compacted);
243 /* We can only compress if there's only one child. */
248 assert(valid_ptr < P_L2_SIZE);
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
265 lp->skip += p[valid_ptr].skip;
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
271 DECLARE_BITMAP(compacted, nodes_nb);
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
282 hwaddr index = addr >> TARGET_PAGE_BITS;
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return §ions[PHYS_SECTION_UNASSIGNED];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return §ions[lp.ptr];
298 return §ions[PHYS_SECTION_UNASSIGNED];
302 bool memory_region_is_unassigned(MemoryRegion *mr)
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
310 bool resolve_subpage)
312 MemoryRegionSection *section;
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
327 MemoryRegionSection *section;
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
347 if (memory_region_is_romd(mr)) {
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
359 MemoryRegionSection *section;
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
367 if (!mr->iommu_ops) {
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
380 as = iotlb.target_as;
383 if (memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
400 assert(!section->mr->iommu_ops);
405 void cpu_exec_init_all(void)
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
414 #if !defined(CONFIG_USER_ONLY)
416 static int cpu_common_post_load(void *opaque, int version_id)
418 CPUState *cpu = opaque;
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu->env_ptr, 1);
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
431 .minimum_version_id = 1,
432 .minimum_version_id_old = 1,
433 .post_load = cpu_common_post_load,
434 .fields = (VMStateField []) {
435 VMSTATE_UINT32(halted, CPUState),
436 VMSTATE_UINT32(interrupt_request, CPUState),
437 VMSTATE_END_OF_LIST()
443 CPUState *qemu_get_cpu(int index)
448 if (cpu->cpu_index == index) {
456 #if !defined(CONFIG_USER_ONLY)
457 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
459 /* We only support one address space per cpu at the moment. */
460 assert(cpu->as == as);
462 if (cpu->tcg_as_listener) {
463 memory_listener_unregister(cpu->tcg_as_listener);
465 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
467 cpu->tcg_as_listener->commit = tcg_commit;
468 memory_listener_register(cpu->tcg_as_listener, as);
472 void cpu_exec_init(CPUArchState *env)
474 CPUState *cpu = ENV_GET_CPU(env);
475 CPUClass *cc = CPU_GET_CLASS(cpu);
479 #if defined(CONFIG_USER_ONLY)
483 CPU_FOREACH(some_cpu) {
486 cpu->cpu_index = cpu_index;
488 QTAILQ_INIT(&cpu->breakpoints);
489 QTAILQ_INIT(&cpu->watchpoints);
490 #ifndef CONFIG_USER_ONLY
491 cpu->as = &address_space_memory;
492 cpu->thread_id = qemu_get_thread_id();
494 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
495 #if defined(CONFIG_USER_ONLY)
498 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
499 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
501 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
502 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
503 cpu_save, cpu_load, env);
504 assert(cc->vmsd == NULL);
505 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
507 if (cc->vmsd != NULL) {
508 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
512 #if defined(TARGET_HAS_ICE)
513 #if defined(CONFIG_USER_ONLY)
514 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
516 tb_invalidate_phys_page_range(pc, pc + 1, 0);
519 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
521 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
523 tb_invalidate_phys_addr(cpu->as,
524 phys | (pc & ~TARGET_PAGE_MASK));
528 #endif /* TARGET_HAS_ICE */
530 #if defined(CONFIG_USER_ONLY)
531 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
536 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
537 int flags, CPUWatchpoint **watchpoint)
542 /* Add a watchpoint. */
543 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
544 int flags, CPUWatchpoint **watchpoint)
546 CPUArchState *env = cpu->env_ptr;
547 vaddr len_mask = ~(len - 1);
550 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
551 if ((len & (len - 1)) || (addr & ~len_mask) ||
552 len == 0 || len > TARGET_PAGE_SIZE) {
553 error_report("tried to set invalid watchpoint at %"
554 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
557 wp = g_malloc(sizeof(*wp));
560 wp->len_mask = len_mask;
563 /* keep all GDB-injected watchpoints in front */
564 if (flags & BP_GDB) {
565 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
567 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
570 tlb_flush_page(env, addr);
577 /* Remove a specific watchpoint. */
578 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
581 vaddr len_mask = ~(len - 1);
584 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
585 if (addr == wp->vaddr && len_mask == wp->len_mask
586 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
587 cpu_watchpoint_remove_by_ref(cpu, wp);
594 /* Remove a specific watchpoint by reference. */
595 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
597 CPUArchState *env = cpu->env_ptr;
599 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
601 tlb_flush_page(env, watchpoint->vaddr);
606 /* Remove all matching watchpoints. */
607 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
609 CPUWatchpoint *wp, *next;
611 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
612 if (wp->flags & mask) {
613 cpu_watchpoint_remove_by_ref(cpu, wp);
619 /* Add a breakpoint. */
620 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
621 CPUBreakpoint **breakpoint)
623 #if defined(TARGET_HAS_ICE)
626 bp = g_malloc(sizeof(*bp));
631 /* keep all GDB-injected breakpoints in front */
632 if (flags & BP_GDB) {
633 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
635 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
638 breakpoint_invalidate(cpu, pc);
649 /* Remove a specific breakpoint. */
650 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
652 #if defined(TARGET_HAS_ICE)
655 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
656 if (bp->pc == pc && bp->flags == flags) {
657 cpu_breakpoint_remove_by_ref(cpu, bp);
667 /* Remove a specific breakpoint by reference. */
668 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
670 #if defined(TARGET_HAS_ICE)
671 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
673 breakpoint_invalidate(cpu, breakpoint->pc);
679 /* Remove all matching breakpoints. */
680 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
682 #if defined(TARGET_HAS_ICE)
683 CPUBreakpoint *bp, *next;
685 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
686 if (bp->flags & mask) {
687 cpu_breakpoint_remove_by_ref(cpu, bp);
693 /* enable or disable single step mode. EXCP_DEBUG is returned by the
694 CPU loop after each instruction */
695 void cpu_single_step(CPUState *cpu, int enabled)
697 #if defined(TARGET_HAS_ICE)
698 if (cpu->singlestep_enabled != enabled) {
699 cpu->singlestep_enabled = enabled;
701 kvm_update_guest_debug(cpu, 0);
703 /* must flush all the translated code to avoid inconsistencies */
704 /* XXX: only flush what is necessary */
705 CPUArchState *env = cpu->env_ptr;
712 void cpu_abort(CPUState *cpu, const char *fmt, ...)
719 fprintf(stderr, "qemu: fatal: ");
720 vfprintf(stderr, fmt, ap);
721 fprintf(stderr, "\n");
722 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
723 if (qemu_log_enabled()) {
724 qemu_log("qemu: fatal: ");
725 qemu_log_vprintf(fmt, ap2);
727 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
733 #if defined(CONFIG_USER_ONLY)
735 struct sigaction act;
736 sigfillset(&act.sa_mask);
737 act.sa_handler = SIG_DFL;
738 sigaction(SIGABRT, &act, NULL);
744 #if !defined(CONFIG_USER_ONLY)
745 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
749 /* The list is protected by the iothread lock here. */
750 block = ram_list.mru_block;
751 if (block && addr - block->offset < block->length) {
754 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
755 if (addr - block->offset < block->length) {
760 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
764 ram_list.mru_block = block;
768 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
774 end = TARGET_PAGE_ALIGN(start + length);
775 start &= TARGET_PAGE_MASK;
777 block = qemu_get_ram_block(start);
778 assert(block == qemu_get_ram_block(end - 1));
779 start1 = (uintptr_t)block->host + (start - block->offset);
780 cpu_tlb_reset_dirty_all(start1, length);
783 /* Note: start and end must be within the same ram block. */
784 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
789 cpu_physical_memory_clear_dirty_range(start, length, client);
792 tlb_reset_dirty_range_all(start, length);
796 static void cpu_physical_memory_set_dirty_tracking(bool enable)
798 in_migration = enable;
801 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
802 MemoryRegionSection *section,
804 hwaddr paddr, hwaddr xlat,
806 target_ulong *address)
811 if (memory_region_is_ram(section->mr)) {
813 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
815 if (!section->readonly) {
816 iotlb |= PHYS_SECTION_NOTDIRTY;
818 iotlb |= PHYS_SECTION_ROM;
821 iotlb = section - section->address_space->dispatch->map.sections;
825 /* Make accesses to pages with watchpoints go via the
826 watchpoint trap routines. */
827 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
828 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
829 /* Avoid trapping reads of pages with a write breakpoint. */
830 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
831 iotlb = PHYS_SECTION_WATCH + paddr;
832 *address |= TLB_MMIO;
840 #endif /* defined(CONFIG_USER_ONLY) */
842 #if !defined(CONFIG_USER_ONLY)
844 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
846 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
848 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
851 * Set a custom physical guest memory alloator.
852 * Accelerators with unusual needs may need this. Hopefully, we can
853 * get rid of it eventually.
855 void phys_mem_set_alloc(void *(*alloc)(size_t))
857 phys_mem_alloc = alloc;
860 static uint16_t phys_section_add(PhysPageMap *map,
861 MemoryRegionSection *section)
863 /* The physical section number is ORed with a page-aligned
864 * pointer to produce the iotlb entries. Thus it should
865 * never overflow into the page-aligned value.
867 assert(map->sections_nb < TARGET_PAGE_SIZE);
869 if (map->sections_nb == map->sections_nb_alloc) {
870 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
871 map->sections = g_renew(MemoryRegionSection, map->sections,
872 map->sections_nb_alloc);
874 map->sections[map->sections_nb] = *section;
875 memory_region_ref(section->mr);
876 return map->sections_nb++;
879 static void phys_section_destroy(MemoryRegion *mr)
881 memory_region_unref(mr);
884 subpage_t *subpage = container_of(mr, subpage_t, iomem);
885 memory_region_destroy(&subpage->iomem);
890 static void phys_sections_free(PhysPageMap *map)
892 while (map->sections_nb > 0) {
893 MemoryRegionSection *section = &map->sections[--map->sections_nb];
894 phys_section_destroy(section->mr);
896 g_free(map->sections);
900 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
903 hwaddr base = section->offset_within_address_space
905 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
906 d->map.nodes, d->map.sections);
907 MemoryRegionSection subsection = {
908 .offset_within_address_space = base,
909 .size = int128_make64(TARGET_PAGE_SIZE),
913 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
915 if (!(existing->mr->subpage)) {
916 subpage = subpage_init(d->as, base);
917 subsection.address_space = d->as;
918 subsection.mr = &subpage->iomem;
919 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
920 phys_section_add(&d->map, &subsection));
922 subpage = container_of(existing->mr, subpage_t, iomem);
924 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
925 end = start + int128_get64(section->size) - 1;
926 subpage_register(subpage, start, end,
927 phys_section_add(&d->map, section));
931 static void register_multipage(AddressSpaceDispatch *d,
932 MemoryRegionSection *section)
934 hwaddr start_addr = section->offset_within_address_space;
935 uint16_t section_index = phys_section_add(&d->map, section);
936 uint64_t num_pages = int128_get64(int128_rshift(section->size,
940 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
943 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
945 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
946 AddressSpaceDispatch *d = as->next_dispatch;
947 MemoryRegionSection now = *section, remain = *section;
948 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
950 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
951 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
952 - now.offset_within_address_space;
954 now.size = int128_min(int128_make64(left), now.size);
955 register_subpage(d, &now);
957 now.size = int128_zero();
959 while (int128_ne(remain.size, now.size)) {
960 remain.size = int128_sub(remain.size, now.size);
961 remain.offset_within_address_space += int128_get64(now.size);
962 remain.offset_within_region += int128_get64(now.size);
964 if (int128_lt(remain.size, page_size)) {
965 register_subpage(d, &now);
966 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
967 now.size = page_size;
968 register_subpage(d, &now);
970 now.size = int128_and(now.size, int128_neg(page_size));
971 register_multipage(d, &now);
976 void qemu_flush_coalesced_mmio_buffer(void)
979 kvm_flush_coalesced_mmio_buffer();
982 void qemu_mutex_lock_ramlist(void)
984 qemu_mutex_lock(&ram_list.mutex);
987 void qemu_mutex_unlock_ramlist(void)
989 qemu_mutex_unlock(&ram_list.mutex);
996 #define HUGETLBFS_MAGIC 0x958458f6
998 static long gethugepagesize(const char *path)
1004 ret = statfs(path, &fs);
1005 } while (ret != 0 && errno == EINTR);
1012 if (fs.f_type != HUGETLBFS_MAGIC)
1013 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1018 static sigjmp_buf sigjump;
1020 static void sigbus_handler(int signal)
1022 siglongjmp(sigjump, 1);
1025 static void *file_ram_alloc(RAMBlock *block,
1030 char *sanitized_name;
1034 unsigned long hpagesize;
1036 hpagesize = gethugepagesize(path);
1041 if (memory < hpagesize) {
1045 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1046 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1050 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1051 sanitized_name = g_strdup(block->mr->name);
1052 for (c = sanitized_name; *c != '\0'; c++) {
1057 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1059 g_free(sanitized_name);
1061 fd = mkstemp(filename);
1063 perror("unable to create backing store for hugepages");
1070 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1073 * ftruncate is not supported by hugetlbfs in older
1074 * hosts, so don't bother bailing out on errors.
1075 * If anything goes wrong with it under other filesystems,
1078 if (ftruncate(fd, memory))
1079 perror("ftruncate");
1081 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1082 if (area == MAP_FAILED) {
1083 perror("file_ram_alloc: can't mmap RAM pages");
1090 struct sigaction act, oldact;
1091 sigset_t set, oldset;
1093 memset(&act, 0, sizeof(act));
1094 act.sa_handler = &sigbus_handler;
1097 ret = sigaction(SIGBUS, &act, &oldact);
1099 perror("file_ram_alloc: failed to install signal handler");
1103 /* unblock SIGBUS */
1105 sigaddset(&set, SIGBUS);
1106 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1108 if (sigsetjmp(sigjump, 1)) {
1109 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1113 /* MAP_POPULATE silently ignores failures */
1114 for (i = 0; i < (memory/hpagesize); i++) {
1115 memset(area + (hpagesize*i), 0, 1);
1118 ret = sigaction(SIGBUS, &oldact, NULL);
1120 perror("file_ram_alloc: failed to reinstall signal handler");
1124 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1137 static void *file_ram_alloc(RAMBlock *block,
1141 fprintf(stderr, "-mem-path not supported on this host\n");
1146 static ram_addr_t find_ram_offset(ram_addr_t size)
1148 RAMBlock *block, *next_block;
1149 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1151 assert(size != 0); /* it would hand out same offset multiple times */
1153 if (QTAILQ_EMPTY(&ram_list.blocks))
1156 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1157 ram_addr_t end, next = RAM_ADDR_MAX;
1159 end = block->offset + block->length;
1161 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1162 if (next_block->offset >= end) {
1163 next = MIN(next, next_block->offset);
1166 if (next - end >= size && next - end < mingap) {
1168 mingap = next - end;
1172 if (offset == RAM_ADDR_MAX) {
1173 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1181 ram_addr_t last_ram_offset(void)
1184 ram_addr_t last = 0;
1186 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1187 last = MAX(last, block->offset + block->length);
1192 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1196 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1197 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1198 "dump-guest-core", true)) {
1199 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1201 perror("qemu_madvise");
1202 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1203 "but dump_guest_core=off specified\n");
1208 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1210 RAMBlock *new_block, *block;
1213 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1214 if (block->offset == addr) {
1220 assert(!new_block->idstr[0]);
1223 char *id = qdev_get_dev_path(dev);
1225 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1229 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1231 /* This assumes the iothread lock is taken here too. */
1232 qemu_mutex_lock_ramlist();
1233 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1234 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1235 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1240 qemu_mutex_unlock_ramlist();
1243 static int memory_try_enable_merging(void *addr, size_t len)
1245 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1246 /* disabled by the user */
1250 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1253 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1256 RAMBlock *block, *new_block;
1257 ram_addr_t old_ram_size, new_ram_size;
1259 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1261 size = TARGET_PAGE_ALIGN(size);
1262 new_block = g_malloc0(sizeof(*new_block));
1265 /* This assumes the iothread lock is taken here too. */
1266 qemu_mutex_lock_ramlist();
1268 new_block->offset = find_ram_offset(size);
1270 new_block->host = host;
1271 new_block->flags |= RAM_PREALLOC_MASK;
1272 } else if (xen_enabled()) {
1274 fprintf(stderr, "-mem-path not supported with Xen\n");
1277 xen_ram_alloc(new_block->offset, size, mr);
1280 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1282 * file_ram_alloc() needs to allocate just like
1283 * phys_mem_alloc, but we haven't bothered to provide
1287 "-mem-path not supported with this accelerator\n");
1290 new_block->host = file_ram_alloc(new_block, size, mem_path);
1292 if (!new_block->host) {
1293 new_block->host = phys_mem_alloc(size);
1294 if (!new_block->host) {
1295 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1296 new_block->mr->name, strerror(errno));
1299 memory_try_enable_merging(new_block->host, size);
1302 new_block->length = size;
1304 /* Keep the list sorted from biggest to smallest block. */
1305 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1306 if (block->length < new_block->length) {
1311 QTAILQ_INSERT_BEFORE(block, new_block, next);
1313 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1315 ram_list.mru_block = NULL;
1318 qemu_mutex_unlock_ramlist();
1320 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1322 if (new_ram_size > old_ram_size) {
1324 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1325 ram_list.dirty_memory[i] =
1326 bitmap_zero_extend(ram_list.dirty_memory[i],
1327 old_ram_size, new_ram_size);
1330 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1332 qemu_ram_setup_dump(new_block->host, size);
1333 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1334 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1337 kvm_setup_guest_memory(new_block->host, size);
1339 return new_block->offset;
1342 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1344 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1347 void qemu_ram_free_from_ptr(ram_addr_t addr)
1351 /* This assumes the iothread lock is taken here too. */
1352 qemu_mutex_lock_ramlist();
1353 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1354 if (addr == block->offset) {
1355 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1356 ram_list.mru_block = NULL;
1362 qemu_mutex_unlock_ramlist();
1365 void qemu_ram_free(ram_addr_t addr)
1369 /* This assumes the iothread lock is taken here too. */
1370 qemu_mutex_lock_ramlist();
1371 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1372 if (addr == block->offset) {
1373 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1374 ram_list.mru_block = NULL;
1376 if (block->flags & RAM_PREALLOC_MASK) {
1378 } else if (xen_enabled()) {
1379 xen_invalidate_map_cache_entry(block->host);
1381 } else if (block->fd >= 0) {
1382 munmap(block->host, block->length);
1386 qemu_anon_ram_free(block->host, block->length);
1392 qemu_mutex_unlock_ramlist();
1397 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1404 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1405 offset = addr - block->offset;
1406 if (offset < block->length) {
1407 vaddr = block->host + offset;
1408 if (block->flags & RAM_PREALLOC_MASK) {
1410 } else if (xen_enabled()) {
1414 munmap(vaddr, length);
1415 if (block->fd >= 0) {
1417 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1420 flags |= MAP_PRIVATE;
1422 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1423 flags, block->fd, offset);
1426 * Remap needs to match alloc. Accelerators that
1427 * set phys_mem_alloc never remap. If they did,
1428 * we'd need a remap hook here.
1430 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1432 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1433 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1436 if (area != vaddr) {
1437 fprintf(stderr, "Could not remap addr: "
1438 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1442 memory_try_enable_merging(vaddr, length);
1443 qemu_ram_setup_dump(vaddr, length);
1449 #endif /* !_WIN32 */
1451 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1452 With the exception of the softmmu code in this file, this should
1453 only be used for local memory (e.g. video ram) that the device owns,
1454 and knows it isn't going to access beyond the end of the block.
1456 It should not be used for general purpose DMA.
1457 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1459 void *qemu_get_ram_ptr(ram_addr_t addr)
1461 RAMBlock *block = qemu_get_ram_block(addr);
1463 if (xen_enabled()) {
1464 /* We need to check if the requested address is in the RAM
1465 * because we don't want to map the entire memory in QEMU.
1466 * In that case just map until the end of the page.
1468 if (block->offset == 0) {
1469 return xen_map_cache(addr, 0, 0);
1470 } else if (block->host == NULL) {
1472 xen_map_cache(block->offset, block->length, 1);
1475 return block->host + (addr - block->offset);
1478 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1479 * but takes a size argument */
1480 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1485 if (xen_enabled()) {
1486 return xen_map_cache(addr, *size, 1);
1490 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1491 if (addr - block->offset < block->length) {
1492 if (addr - block->offset + *size > block->length)
1493 *size = block->length - addr + block->offset;
1494 return block->host + (addr - block->offset);
1498 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1503 /* Some of the softmmu routines need to translate from a host pointer
1504 (typically a TLB entry) back to a ram offset. */
1505 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1508 uint8_t *host = ptr;
1510 if (xen_enabled()) {
1511 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1512 return qemu_get_ram_block(*ram_addr)->mr;
1515 block = ram_list.mru_block;
1516 if (block && block->host && host - block->host < block->length) {
1520 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1521 /* This case append when the block is not mapped. */
1522 if (block->host == NULL) {
1525 if (host - block->host < block->length) {
1533 *ram_addr = block->offset + (host - block->host);
1537 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1538 uint64_t val, unsigned size)
1540 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1541 tb_invalidate_phys_page_fast(ram_addr, size);
1545 stb_p(qemu_get_ram_ptr(ram_addr), val);
1548 stw_p(qemu_get_ram_ptr(ram_addr), val);
1551 stl_p(qemu_get_ram_ptr(ram_addr), val);
1556 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1557 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1558 /* we remove the notdirty callback only if the code has been
1560 if (!cpu_physical_memory_is_clean(ram_addr)) {
1561 CPUArchState *env = current_cpu->env_ptr;
1562 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1566 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1567 unsigned size, bool is_write)
1572 static const MemoryRegionOps notdirty_mem_ops = {
1573 .write = notdirty_mem_write,
1574 .valid.accepts = notdirty_mem_accepts,
1575 .endianness = DEVICE_NATIVE_ENDIAN,
1578 /* Generate a debug exception if a watchpoint has been hit. */
1579 static void check_watchpoint(int offset, int len_mask, int flags)
1581 CPUState *cpu = current_cpu;
1582 CPUArchState *env = cpu->env_ptr;
1583 target_ulong pc, cs_base;
1588 if (cpu->watchpoint_hit) {
1589 /* We re-entered the check after replacing the TB. Now raise
1590 * the debug interrupt so that is will trigger after the
1591 * current instruction. */
1592 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1595 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1596 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1597 if ((vaddr == (wp->vaddr & len_mask) ||
1598 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1599 wp->flags |= BP_WATCHPOINT_HIT;
1600 if (!cpu->watchpoint_hit) {
1601 cpu->watchpoint_hit = wp;
1602 tb_check_watchpoint(cpu);
1603 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1604 cpu->exception_index = EXCP_DEBUG;
1607 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1608 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1609 cpu_resume_from_signal(cpu, NULL);
1613 wp->flags &= ~BP_WATCHPOINT_HIT;
1618 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1619 so these check for a hit then pass through to the normal out-of-line
1621 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1624 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1626 case 1: return ldub_phys(&address_space_memory, addr);
1627 case 2: return lduw_phys(&address_space_memory, addr);
1628 case 4: return ldl_phys(&address_space_memory, addr);
1633 static void watch_mem_write(void *opaque, hwaddr addr,
1634 uint64_t val, unsigned size)
1636 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1639 stb_phys(&address_space_memory, addr, val);
1642 stw_phys(&address_space_memory, addr, val);
1645 stl_phys(&address_space_memory, addr, val);
1651 static const MemoryRegionOps watch_mem_ops = {
1652 .read = watch_mem_read,
1653 .write = watch_mem_write,
1654 .endianness = DEVICE_NATIVE_ENDIAN,
1657 static uint64_t subpage_read(void *opaque, hwaddr addr,
1660 subpage_t *subpage = opaque;
1663 #if defined(DEBUG_SUBPAGE)
1664 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1665 subpage, len, addr);
1667 address_space_read(subpage->as, addr + subpage->base, buf, len);
1680 static void subpage_write(void *opaque, hwaddr addr,
1681 uint64_t value, unsigned len)
1683 subpage_t *subpage = opaque;
1686 #if defined(DEBUG_SUBPAGE)
1687 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1688 " value %"PRIx64"\n",
1689 __func__, subpage, len, addr, value);
1704 address_space_write(subpage->as, addr + subpage->base, buf, len);
1707 static bool subpage_accepts(void *opaque, hwaddr addr,
1708 unsigned len, bool is_write)
1710 subpage_t *subpage = opaque;
1711 #if defined(DEBUG_SUBPAGE)
1712 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1713 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1716 return address_space_access_valid(subpage->as, addr + subpage->base,
1720 static const MemoryRegionOps subpage_ops = {
1721 .read = subpage_read,
1722 .write = subpage_write,
1723 .valid.accepts = subpage_accepts,
1724 .endianness = DEVICE_NATIVE_ENDIAN,
1727 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1732 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1734 idx = SUBPAGE_IDX(start);
1735 eidx = SUBPAGE_IDX(end);
1736 #if defined(DEBUG_SUBPAGE)
1737 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1738 __func__, mmio, start, end, idx, eidx, section);
1740 for (; idx <= eidx; idx++) {
1741 mmio->sub_section[idx] = section;
1747 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1751 mmio = g_malloc0(sizeof(subpage_t));
1755 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1756 "subpage", TARGET_PAGE_SIZE);
1757 mmio->iomem.subpage = true;
1758 #if defined(DEBUG_SUBPAGE)
1759 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1760 mmio, base, TARGET_PAGE_SIZE);
1762 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1767 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1769 MemoryRegionSection section = {
1770 .address_space = &address_space_memory,
1772 .offset_within_address_space = 0,
1773 .offset_within_region = 0,
1774 .size = int128_2_64(),
1777 return phys_section_add(map, §ion);
1780 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1782 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1785 static void io_mem_init(void)
1787 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1788 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1789 "unassigned", UINT64_MAX);
1790 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1791 "notdirty", UINT64_MAX);
1792 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1793 "watch", UINT64_MAX);
1796 static void mem_begin(MemoryListener *listener)
1798 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1799 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1802 n = dummy_section(&d->map, &io_mem_unassigned);
1803 assert(n == PHYS_SECTION_UNASSIGNED);
1804 n = dummy_section(&d->map, &io_mem_notdirty);
1805 assert(n == PHYS_SECTION_NOTDIRTY);
1806 n = dummy_section(&d->map, &io_mem_rom);
1807 assert(n == PHYS_SECTION_ROM);
1808 n = dummy_section(&d->map, &io_mem_watch);
1809 assert(n == PHYS_SECTION_WATCH);
1811 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1813 as->next_dispatch = d;
1816 static void mem_commit(MemoryListener *listener)
1818 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1819 AddressSpaceDispatch *cur = as->dispatch;
1820 AddressSpaceDispatch *next = as->next_dispatch;
1822 phys_page_compact_all(next, next->map.nodes_nb);
1824 as->dispatch = next;
1827 phys_sections_free(&cur->map);
1832 static void tcg_commit(MemoryListener *listener)
1836 /* since each CPU stores ram addresses in its TLB cache, we must
1837 reset the modified entries */
1840 CPUArchState *env = cpu->env_ptr;
1842 /* FIXME: Disentangle the cpu.h circular files deps so we can
1843 directly get the right CPU from listener. */
1844 if (cpu->tcg_as_listener != listener) {
1851 static void core_log_global_start(MemoryListener *listener)
1853 cpu_physical_memory_set_dirty_tracking(true);
1856 static void core_log_global_stop(MemoryListener *listener)
1858 cpu_physical_memory_set_dirty_tracking(false);
1861 static MemoryListener core_memory_listener = {
1862 .log_global_start = core_log_global_start,
1863 .log_global_stop = core_log_global_stop,
1867 void address_space_init_dispatch(AddressSpace *as)
1869 as->dispatch = NULL;
1870 as->dispatch_listener = (MemoryListener) {
1872 .commit = mem_commit,
1873 .region_add = mem_add,
1874 .region_nop = mem_add,
1877 memory_listener_register(&as->dispatch_listener, as);
1880 void address_space_destroy_dispatch(AddressSpace *as)
1882 AddressSpaceDispatch *d = as->dispatch;
1884 memory_listener_unregister(&as->dispatch_listener);
1886 as->dispatch = NULL;
1889 static void memory_map_init(void)
1891 system_memory = g_malloc(sizeof(*system_memory));
1893 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1894 address_space_init(&address_space_memory, system_memory, "memory");
1896 system_io = g_malloc(sizeof(*system_io));
1897 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1899 address_space_init(&address_space_io, system_io, "I/O");
1901 memory_listener_register(&core_memory_listener, &address_space_memory);
1904 MemoryRegion *get_system_memory(void)
1906 return system_memory;
1909 MemoryRegion *get_system_io(void)
1914 #endif /* !defined(CONFIG_USER_ONLY) */
1916 /* physical memory access (slow version, mainly for debug) */
1917 #if defined(CONFIG_USER_ONLY)
1918 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1919 uint8_t *buf, int len, int is_write)
1926 page = addr & TARGET_PAGE_MASK;
1927 l = (page + TARGET_PAGE_SIZE) - addr;
1930 flags = page_get_flags(page);
1931 if (!(flags & PAGE_VALID))
1934 if (!(flags & PAGE_WRITE))
1936 /* XXX: this code should not depend on lock_user */
1937 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1940 unlock_user(p, addr, l);
1942 if (!(flags & PAGE_READ))
1944 /* XXX: this code should not depend on lock_user */
1945 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1948 unlock_user(p, addr, 0);
1959 static void invalidate_and_set_dirty(hwaddr addr,
1962 if (cpu_physical_memory_is_clean(addr)) {
1963 /* invalidate code */
1964 tb_invalidate_phys_page_range(addr, addr + length, 0);
1966 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1967 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1969 xen_modified_memory(addr, length);
1972 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1974 unsigned access_size_max = mr->ops->valid.max_access_size;
1976 /* Regions are assumed to support 1-4 byte accesses unless
1977 otherwise specified. */
1978 if (access_size_max == 0) {
1979 access_size_max = 4;
1982 /* Bound the maximum access by the alignment of the address. */
1983 if (!mr->ops->impl.unaligned) {
1984 unsigned align_size_max = addr & -addr;
1985 if (align_size_max != 0 && align_size_max < access_size_max) {
1986 access_size_max = align_size_max;
1990 /* Don't attempt accesses larger than the maximum. */
1991 if (l > access_size_max) {
1992 l = access_size_max;
1995 l = 1 << (qemu_fls(l) - 1);
2001 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2002 int len, bool is_write)
2013 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2016 if (!memory_access_is_direct(mr, is_write)) {
2017 l = memory_access_size(mr, l, addr1);
2018 /* XXX: could force current_cpu to NULL to avoid
2022 /* 64 bit write access */
2024 error |= io_mem_write(mr, addr1, val, 8);
2027 /* 32 bit write access */
2029 error |= io_mem_write(mr, addr1, val, 4);
2032 /* 16 bit write access */
2034 error |= io_mem_write(mr, addr1, val, 2);
2037 /* 8 bit write access */
2039 error |= io_mem_write(mr, addr1, val, 1);
2045 addr1 += memory_region_get_ram_addr(mr);
2047 ptr = qemu_get_ram_ptr(addr1);
2048 memcpy(ptr, buf, l);
2049 invalidate_and_set_dirty(addr1, l);
2052 if (!memory_access_is_direct(mr, is_write)) {
2054 l = memory_access_size(mr, l, addr1);
2057 /* 64 bit read access */
2058 error |= io_mem_read(mr, addr1, &val, 8);
2062 /* 32 bit read access */
2063 error |= io_mem_read(mr, addr1, &val, 4);
2067 /* 16 bit read access */
2068 error |= io_mem_read(mr, addr1, &val, 2);
2072 /* 8 bit read access */
2073 error |= io_mem_read(mr, addr1, &val, 1);
2081 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2082 memcpy(buf, ptr, l);
2093 bool address_space_write(AddressSpace *as, hwaddr addr,
2094 const uint8_t *buf, int len)
2096 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2099 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2101 return address_space_rw(as, addr, buf, len, false);
2105 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2106 int len, int is_write)
2108 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2111 enum write_rom_type {
2116 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2117 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2126 mr = address_space_translate(as, addr, &addr1, &l, true);
2128 if (!(memory_region_is_ram(mr) ||
2129 memory_region_is_romd(mr))) {
2132 addr1 += memory_region_get_ram_addr(mr);
2134 ptr = qemu_get_ram_ptr(addr1);
2137 memcpy(ptr, buf, l);
2138 invalidate_and_set_dirty(addr1, l);
2141 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2151 /* used for ROM loading : can write in RAM and ROM */
2152 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2153 const uint8_t *buf, int len)
2155 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2158 void cpu_flush_icache_range(hwaddr start, int len)
2161 * This function should do the same thing as an icache flush that was
2162 * triggered from within the guest. For TCG we are always cache coherent,
2163 * so there is no need to flush anything. For KVM / Xen we need to flush
2164 * the host's instruction cache at least.
2166 if (tcg_enabled()) {
2170 cpu_physical_memory_write_rom_internal(&address_space_memory,
2171 start, NULL, len, FLUSH_CACHE);
2181 static BounceBuffer bounce;
2183 typedef struct MapClient {
2185 void (*callback)(void *opaque);
2186 QLIST_ENTRY(MapClient) link;
2189 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2190 = QLIST_HEAD_INITIALIZER(map_client_list);
2192 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2194 MapClient *client = g_malloc(sizeof(*client));
2196 client->opaque = opaque;
2197 client->callback = callback;
2198 QLIST_INSERT_HEAD(&map_client_list, client, link);
2202 static void cpu_unregister_map_client(void *_client)
2204 MapClient *client = (MapClient *)_client;
2206 QLIST_REMOVE(client, link);
2210 static void cpu_notify_map_clients(void)
2214 while (!QLIST_EMPTY(&map_client_list)) {
2215 client = QLIST_FIRST(&map_client_list);
2216 client->callback(client->opaque);
2217 cpu_unregister_map_client(client);
2221 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2228 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2229 if (!memory_access_is_direct(mr, is_write)) {
2230 l = memory_access_size(mr, l, addr);
2231 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2242 /* Map a physical memory region into a host virtual address.
2243 * May map a subset of the requested range, given by and returned in *plen.
2244 * May return NULL if resources needed to perform the mapping are exhausted.
2245 * Use only for reads OR writes - not for read-modify-write operations.
2246 * Use cpu_register_map_client() to know when retrying the map operation is
2247 * likely to succeed.
2249 void *address_space_map(AddressSpace *as,
2256 hwaddr l, xlat, base;
2257 MemoryRegion *mr, *this_mr;
2265 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2266 if (!memory_access_is_direct(mr, is_write)) {
2267 if (bounce.buffer) {
2270 /* Avoid unbounded allocations */
2271 l = MIN(l, TARGET_PAGE_SIZE);
2272 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2276 memory_region_ref(mr);
2279 address_space_read(as, addr, bounce.buffer, l);
2283 return bounce.buffer;
2287 raddr = memory_region_get_ram_addr(mr);
2298 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2299 if (this_mr != mr || xlat != base + done) {
2304 memory_region_ref(mr);
2306 return qemu_ram_ptr_length(raddr + base, plen);
2309 /* Unmaps a memory region previously mapped by address_space_map().
2310 * Will also mark the memory as dirty if is_write == 1. access_len gives
2311 * the amount of memory that was actually read or written by the caller.
2313 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2314 int is_write, hwaddr access_len)
2316 if (buffer != bounce.buffer) {
2320 mr = qemu_ram_addr_from_host(buffer, &addr1);
2323 while (access_len) {
2325 l = TARGET_PAGE_SIZE;
2328 invalidate_and_set_dirty(addr1, l);
2333 if (xen_enabled()) {
2334 xen_invalidate_map_cache_entry(buffer);
2336 memory_region_unref(mr);
2340 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2342 qemu_vfree(bounce.buffer);
2343 bounce.buffer = NULL;
2344 memory_region_unref(bounce.mr);
2345 cpu_notify_map_clients();
2348 void *cpu_physical_memory_map(hwaddr addr,
2352 return address_space_map(&address_space_memory, addr, plen, is_write);
2355 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2356 int is_write, hwaddr access_len)
2358 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2361 /* warning: addr must be aligned */
2362 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2363 enum device_endian endian)
2371 mr = address_space_translate(as, addr, &addr1, &l, false);
2372 if (l < 4 || !memory_access_is_direct(mr, false)) {
2374 io_mem_read(mr, addr1, &val, 4);
2375 #if defined(TARGET_WORDS_BIGENDIAN)
2376 if (endian == DEVICE_LITTLE_ENDIAN) {
2380 if (endian == DEVICE_BIG_ENDIAN) {
2386 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2390 case DEVICE_LITTLE_ENDIAN:
2391 val = ldl_le_p(ptr);
2393 case DEVICE_BIG_ENDIAN:
2394 val = ldl_be_p(ptr);
2404 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2406 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2409 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2411 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2414 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2416 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2419 /* warning: addr must be aligned */
2420 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2421 enum device_endian endian)
2429 mr = address_space_translate(as, addr, &addr1, &l,
2431 if (l < 8 || !memory_access_is_direct(mr, false)) {
2433 io_mem_read(mr, addr1, &val, 8);
2434 #if defined(TARGET_WORDS_BIGENDIAN)
2435 if (endian == DEVICE_LITTLE_ENDIAN) {
2439 if (endian == DEVICE_BIG_ENDIAN) {
2445 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2449 case DEVICE_LITTLE_ENDIAN:
2450 val = ldq_le_p(ptr);
2452 case DEVICE_BIG_ENDIAN:
2453 val = ldq_be_p(ptr);
2463 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2465 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2468 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2470 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2473 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2475 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2479 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2482 address_space_rw(as, addr, &val, 1, 0);
2486 /* warning: addr must be aligned */
2487 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2488 enum device_endian endian)
2496 mr = address_space_translate(as, addr, &addr1, &l,
2498 if (l < 2 || !memory_access_is_direct(mr, false)) {
2500 io_mem_read(mr, addr1, &val, 2);
2501 #if defined(TARGET_WORDS_BIGENDIAN)
2502 if (endian == DEVICE_LITTLE_ENDIAN) {
2506 if (endian == DEVICE_BIG_ENDIAN) {
2512 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2516 case DEVICE_LITTLE_ENDIAN:
2517 val = lduw_le_p(ptr);
2519 case DEVICE_BIG_ENDIAN:
2520 val = lduw_be_p(ptr);
2530 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2532 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2535 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2537 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2540 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2542 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2545 /* warning: addr must be aligned. The ram page is not masked as dirty
2546 and the code inside is not invalidated. It is useful if the dirty
2547 bits are used to track modified PTEs */
2548 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2555 mr = address_space_translate(as, addr, &addr1, &l,
2557 if (l < 4 || !memory_access_is_direct(mr, true)) {
2558 io_mem_write(mr, addr1, val, 4);
2560 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2561 ptr = qemu_get_ram_ptr(addr1);
2564 if (unlikely(in_migration)) {
2565 if (cpu_physical_memory_is_clean(addr1)) {
2566 /* invalidate code */
2567 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2569 cpu_physical_memory_set_dirty_flag(addr1,
2570 DIRTY_MEMORY_MIGRATION);
2571 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2577 /* warning: addr must be aligned */
2578 static inline void stl_phys_internal(AddressSpace *as,
2579 hwaddr addr, uint32_t val,
2580 enum device_endian endian)
2587 mr = address_space_translate(as, addr, &addr1, &l,
2589 if (l < 4 || !memory_access_is_direct(mr, true)) {
2590 #if defined(TARGET_WORDS_BIGENDIAN)
2591 if (endian == DEVICE_LITTLE_ENDIAN) {
2595 if (endian == DEVICE_BIG_ENDIAN) {
2599 io_mem_write(mr, addr1, val, 4);
2602 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2603 ptr = qemu_get_ram_ptr(addr1);
2605 case DEVICE_LITTLE_ENDIAN:
2608 case DEVICE_BIG_ENDIAN:
2615 invalidate_and_set_dirty(addr1, 4);
2619 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2621 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2624 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2626 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2629 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2631 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2635 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2638 address_space_rw(as, addr, &v, 1, 1);
2641 /* warning: addr must be aligned */
2642 static inline void stw_phys_internal(AddressSpace *as,
2643 hwaddr addr, uint32_t val,
2644 enum device_endian endian)
2651 mr = address_space_translate(as, addr, &addr1, &l, true);
2652 if (l < 2 || !memory_access_is_direct(mr, true)) {
2653 #if defined(TARGET_WORDS_BIGENDIAN)
2654 if (endian == DEVICE_LITTLE_ENDIAN) {
2658 if (endian == DEVICE_BIG_ENDIAN) {
2662 io_mem_write(mr, addr1, val, 2);
2665 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2666 ptr = qemu_get_ram_ptr(addr1);
2668 case DEVICE_LITTLE_ENDIAN:
2671 case DEVICE_BIG_ENDIAN:
2678 invalidate_and_set_dirty(addr1, 2);
2682 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2684 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2687 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2689 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2692 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2694 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2698 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2701 address_space_rw(as, addr, (void *) &val, 8, 1);
2704 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2706 val = cpu_to_le64(val);
2707 address_space_rw(as, addr, (void *) &val, 8, 1);
2710 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2712 val = cpu_to_be64(val);
2713 address_space_rw(as, addr, (void *) &val, 8, 1);
2716 /* virtual memory access for debug (includes writing to ROM) */
2717 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2718 uint8_t *buf, int len, int is_write)
2725 page = addr & TARGET_PAGE_MASK;
2726 phys_addr = cpu_get_phys_page_debug(cpu, page);
2727 /* if no physical page mapped, return an error */
2728 if (phys_addr == -1)
2730 l = (page + TARGET_PAGE_SIZE) - addr;
2733 phys_addr += (addr & ~TARGET_PAGE_MASK);
2735 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2737 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2747 #if !defined(CONFIG_USER_ONLY)
2750 * A helper function for the _utterly broken_ virtio device model to find out if
2751 * it's running on a big endian machine. Don't do this at home kids!
2753 bool virtio_is_big_endian(void);
2754 bool virtio_is_big_endian(void)
2756 #if defined(TARGET_WORDS_BIGENDIAN)
2765 #ifndef CONFIG_USER_ONLY
2766 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2771 mr = address_space_translate(&address_space_memory,
2772 phys_addr, &phys_addr, &l, false);
2774 return !(memory_region_is_ram(mr) ||
2775 memory_region_is_romd(mr));
2778 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2782 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2783 func(block->host, block->offset, block->length, opaque);