4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
55 #include "qemu/range.h"
57 //#define DEBUG_SUBPAGE
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
75 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
76 /* current CPU in the current thread. It is only valid inside
78 DEFINE_TLS(CPUState *, current_cpu);
79 /* 0 = Do not count executed instructions.
80 1 = Precise instruction counting.
81 2 = Adaptive rate instruction counting. */
84 #if !defined(CONFIG_USER_ONLY)
86 typedef struct PhysPageEntry PhysPageEntry;
88 struct PhysPageEntry {
89 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
91 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
95 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
97 /* Size of the L2 (and L3, etc) page tables. */
98 #define ADDR_SPACE_BITS 64
101 #define P_L2_SIZE (1 << P_L2_BITS)
103 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
105 typedef PhysPageEntry Node[P_L2_SIZE];
107 typedef struct PhysPageMap {
108 unsigned sections_nb;
109 unsigned sections_nb_alloc;
111 unsigned nodes_nb_alloc;
113 MemoryRegionSection *sections;
116 struct AddressSpaceDispatch {
117 /* This is a multi-level map on the physical address space.
118 * The bottom level has pointers to MemoryRegionSections.
120 PhysPageEntry phys_map;
125 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
126 typedef struct subpage_t {
130 uint16_t sub_section[TARGET_PAGE_SIZE];
133 #define PHYS_SECTION_UNASSIGNED 0
134 #define PHYS_SECTION_NOTDIRTY 1
135 #define PHYS_SECTION_ROM 2
136 #define PHYS_SECTION_WATCH 3
138 static void io_mem_init(void);
139 static void memory_map_init(void);
140 static void tcg_commit(MemoryListener *listener);
142 static MemoryRegion io_mem_watch;
145 #if !defined(CONFIG_USER_ONLY)
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
183 for (i = 0; i < P_L2_SIZE; i++) {
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
189 p = map->nodes[lp->ptr];
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
221 unsigned valid_ptr = P_L2_SIZE;
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
239 phys_page_compact(&p[i], nodes, compacted);
243 /* We can only compress if there's only one child. */
248 assert(valid_ptr < P_L2_SIZE);
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
265 lp->skip += p[valid_ptr].skip;
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
271 DECLARE_BITMAP(compacted, nodes_nb);
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
282 hwaddr index = addr >> TARGET_PAGE_BITS;
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return §ions[PHYS_SECTION_UNASSIGNED];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return §ions[lp.ptr];
298 return §ions[PHYS_SECTION_UNASSIGNED];
302 bool memory_region_is_unassigned(MemoryRegion *mr)
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
310 bool resolve_subpage)
312 MemoryRegionSection *section;
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
327 MemoryRegionSection *section;
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
337 diff = int128_sub(section->mr->size, int128_make64(addr));
338 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
342 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
344 if (memory_region_is_ram(mr)) {
345 return !(is_write && mr->readonly);
347 if (memory_region_is_romd(mr)) {
354 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
355 hwaddr *xlat, hwaddr *plen,
359 MemoryRegionSection *section;
364 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
367 if (!mr->iommu_ops) {
371 iotlb = mr->iommu_ops->translate(mr, addr);
372 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
373 | (addr & iotlb.addr_mask));
374 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
375 if (!(iotlb.perm & (1 << is_write))) {
376 mr = &io_mem_unassigned;
380 as = iotlb.target_as;
383 if (memory_access_is_direct(mr, is_write)) {
384 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
385 len = MIN(page, len);
393 MemoryRegionSection *
394 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
397 MemoryRegionSection *section;
398 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
400 assert(!section->mr->iommu_ops);
405 void cpu_exec_init_all(void)
407 #if !defined(CONFIG_USER_ONLY)
408 qemu_mutex_init(&ram_list.mutex);
414 #if !defined(CONFIG_USER_ONLY)
416 static int cpu_common_post_load(void *opaque, int version_id)
418 CPUState *cpu = opaque;
420 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
421 version_id is increased. */
422 cpu->interrupt_request &= ~0x01;
423 tlb_flush(cpu->env_ptr, 1);
428 const VMStateDescription vmstate_cpu_common = {
429 .name = "cpu_common",
431 .minimum_version_id = 1,
432 .minimum_version_id_old = 1,
433 .post_load = cpu_common_post_load,
434 .fields = (VMStateField []) {
435 VMSTATE_UINT32(halted, CPUState),
436 VMSTATE_UINT32(interrupt_request, CPUState),
437 VMSTATE_END_OF_LIST()
443 CPUState *qemu_get_cpu(int index)
448 if (cpu->cpu_index == index) {
456 #if !defined(CONFIG_USER_ONLY)
457 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
459 /* We only support one address space per cpu at the moment. */
460 assert(cpu->as == as);
462 if (cpu->tcg_as_listener) {
463 memory_listener_unregister(cpu->tcg_as_listener);
465 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
467 cpu->tcg_as_listener->commit = tcg_commit;
468 memory_listener_register(cpu->tcg_as_listener, as);
472 void cpu_exec_init(CPUArchState *env)
474 CPUState *cpu = ENV_GET_CPU(env);
475 CPUClass *cc = CPU_GET_CLASS(cpu);
479 #if defined(CONFIG_USER_ONLY)
483 CPU_FOREACH(some_cpu) {
486 cpu->cpu_index = cpu_index;
488 QTAILQ_INIT(&cpu->breakpoints);
489 QTAILQ_INIT(&cpu->watchpoints);
490 #ifndef CONFIG_USER_ONLY
491 cpu->as = &address_space_memory;
492 cpu->thread_id = qemu_get_thread_id();
494 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
495 #if defined(CONFIG_USER_ONLY)
498 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
499 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
501 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
502 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
503 cpu_save, cpu_load, env);
504 assert(cc->vmsd == NULL);
505 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
507 if (cc->vmsd != NULL) {
508 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
512 #if defined(TARGET_HAS_ICE)
513 #if defined(CONFIG_USER_ONLY)
514 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
516 tb_invalidate_phys_page_range(pc, pc + 1, 0);
519 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
521 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
523 tb_invalidate_phys_addr(cpu->as,
524 phys | (pc & ~TARGET_PAGE_MASK));
528 #endif /* TARGET_HAS_ICE */
530 #if defined(CONFIG_USER_ONLY)
531 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
536 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
537 int flags, CPUWatchpoint **watchpoint)
542 /* Add a watchpoint. */
543 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
544 int flags, CPUWatchpoint **watchpoint)
546 vaddr len_mask = ~(len - 1);
549 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
550 if ((len & (len - 1)) || (addr & ~len_mask) ||
551 len == 0 || len > TARGET_PAGE_SIZE) {
552 error_report("tried to set invalid watchpoint at %"
553 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
556 wp = g_malloc(sizeof(*wp));
559 wp->len_mask = len_mask;
562 /* keep all GDB-injected watchpoints in front */
563 if (flags & BP_GDB) {
564 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
566 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
569 tlb_flush_page(cpu, addr);
576 /* Remove a specific watchpoint. */
577 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
580 vaddr len_mask = ~(len - 1);
583 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
584 if (addr == wp->vaddr && len_mask == wp->len_mask
585 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
586 cpu_watchpoint_remove_by_ref(cpu, wp);
593 /* Remove a specific watchpoint by reference. */
594 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
596 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
598 tlb_flush_page(cpu, watchpoint->vaddr);
603 /* Remove all matching watchpoints. */
604 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
606 CPUWatchpoint *wp, *next;
608 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
609 if (wp->flags & mask) {
610 cpu_watchpoint_remove_by_ref(cpu, wp);
616 /* Add a breakpoint. */
617 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
618 CPUBreakpoint **breakpoint)
620 #if defined(TARGET_HAS_ICE)
623 bp = g_malloc(sizeof(*bp));
628 /* keep all GDB-injected breakpoints in front */
629 if (flags & BP_GDB) {
630 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
632 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
635 breakpoint_invalidate(cpu, pc);
646 /* Remove a specific breakpoint. */
647 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
649 #if defined(TARGET_HAS_ICE)
652 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
653 if (bp->pc == pc && bp->flags == flags) {
654 cpu_breakpoint_remove_by_ref(cpu, bp);
664 /* Remove a specific breakpoint by reference. */
665 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
667 #if defined(TARGET_HAS_ICE)
668 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
670 breakpoint_invalidate(cpu, breakpoint->pc);
676 /* Remove all matching breakpoints. */
677 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
679 #if defined(TARGET_HAS_ICE)
680 CPUBreakpoint *bp, *next;
682 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
683 if (bp->flags & mask) {
684 cpu_breakpoint_remove_by_ref(cpu, bp);
690 /* enable or disable single step mode. EXCP_DEBUG is returned by the
691 CPU loop after each instruction */
692 void cpu_single_step(CPUState *cpu, int enabled)
694 #if defined(TARGET_HAS_ICE)
695 if (cpu->singlestep_enabled != enabled) {
696 cpu->singlestep_enabled = enabled;
698 kvm_update_guest_debug(cpu, 0);
700 /* must flush all the translated code to avoid inconsistencies */
701 /* XXX: only flush what is necessary */
702 CPUArchState *env = cpu->env_ptr;
709 void cpu_abort(CPUState *cpu, const char *fmt, ...)
716 fprintf(stderr, "qemu: fatal: ");
717 vfprintf(stderr, fmt, ap);
718 fprintf(stderr, "\n");
719 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
720 if (qemu_log_enabled()) {
721 qemu_log("qemu: fatal: ");
722 qemu_log_vprintf(fmt, ap2);
724 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
730 #if defined(CONFIG_USER_ONLY)
732 struct sigaction act;
733 sigfillset(&act.sa_mask);
734 act.sa_handler = SIG_DFL;
735 sigaction(SIGABRT, &act, NULL);
741 #if !defined(CONFIG_USER_ONLY)
742 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
746 /* The list is protected by the iothread lock here. */
747 block = ram_list.mru_block;
748 if (block && addr - block->offset < block->length) {
751 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
752 if (addr - block->offset < block->length) {
757 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
761 ram_list.mru_block = block;
765 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
771 end = TARGET_PAGE_ALIGN(start + length);
772 start &= TARGET_PAGE_MASK;
774 block = qemu_get_ram_block(start);
775 assert(block == qemu_get_ram_block(end - 1));
776 start1 = (uintptr_t)block->host + (start - block->offset);
777 cpu_tlb_reset_dirty_all(start1, length);
780 /* Note: start and end must be within the same ram block. */
781 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
786 cpu_physical_memory_clear_dirty_range(start, length, client);
789 tlb_reset_dirty_range_all(start, length);
793 static void cpu_physical_memory_set_dirty_tracking(bool enable)
795 in_migration = enable;
798 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
799 MemoryRegionSection *section,
801 hwaddr paddr, hwaddr xlat,
803 target_ulong *address)
808 if (memory_region_is_ram(section->mr)) {
810 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
812 if (!section->readonly) {
813 iotlb |= PHYS_SECTION_NOTDIRTY;
815 iotlb |= PHYS_SECTION_ROM;
818 iotlb = section - section->address_space->dispatch->map.sections;
822 /* Make accesses to pages with watchpoints go via the
823 watchpoint trap routines. */
824 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
825 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
826 /* Avoid trapping reads of pages with a write breakpoint. */
827 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
828 iotlb = PHYS_SECTION_WATCH + paddr;
829 *address |= TLB_MMIO;
837 #endif /* defined(CONFIG_USER_ONLY) */
839 #if !defined(CONFIG_USER_ONLY)
841 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
843 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
845 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
848 * Set a custom physical guest memory alloator.
849 * Accelerators with unusual needs may need this. Hopefully, we can
850 * get rid of it eventually.
852 void phys_mem_set_alloc(void *(*alloc)(size_t))
854 phys_mem_alloc = alloc;
857 static uint16_t phys_section_add(PhysPageMap *map,
858 MemoryRegionSection *section)
860 /* The physical section number is ORed with a page-aligned
861 * pointer to produce the iotlb entries. Thus it should
862 * never overflow into the page-aligned value.
864 assert(map->sections_nb < TARGET_PAGE_SIZE);
866 if (map->sections_nb == map->sections_nb_alloc) {
867 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
868 map->sections = g_renew(MemoryRegionSection, map->sections,
869 map->sections_nb_alloc);
871 map->sections[map->sections_nb] = *section;
872 memory_region_ref(section->mr);
873 return map->sections_nb++;
876 static void phys_section_destroy(MemoryRegion *mr)
878 memory_region_unref(mr);
881 subpage_t *subpage = container_of(mr, subpage_t, iomem);
882 memory_region_destroy(&subpage->iomem);
887 static void phys_sections_free(PhysPageMap *map)
889 while (map->sections_nb > 0) {
890 MemoryRegionSection *section = &map->sections[--map->sections_nb];
891 phys_section_destroy(section->mr);
893 g_free(map->sections);
897 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
900 hwaddr base = section->offset_within_address_space
902 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
903 d->map.nodes, d->map.sections);
904 MemoryRegionSection subsection = {
905 .offset_within_address_space = base,
906 .size = int128_make64(TARGET_PAGE_SIZE),
910 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
912 if (!(existing->mr->subpage)) {
913 subpage = subpage_init(d->as, base);
914 subsection.address_space = d->as;
915 subsection.mr = &subpage->iomem;
916 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
917 phys_section_add(&d->map, &subsection));
919 subpage = container_of(existing->mr, subpage_t, iomem);
921 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
922 end = start + int128_get64(section->size) - 1;
923 subpage_register(subpage, start, end,
924 phys_section_add(&d->map, section));
928 static void register_multipage(AddressSpaceDispatch *d,
929 MemoryRegionSection *section)
931 hwaddr start_addr = section->offset_within_address_space;
932 uint16_t section_index = phys_section_add(&d->map, section);
933 uint64_t num_pages = int128_get64(int128_rshift(section->size,
937 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
940 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
942 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
943 AddressSpaceDispatch *d = as->next_dispatch;
944 MemoryRegionSection now = *section, remain = *section;
945 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
947 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
948 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
949 - now.offset_within_address_space;
951 now.size = int128_min(int128_make64(left), now.size);
952 register_subpage(d, &now);
954 now.size = int128_zero();
956 while (int128_ne(remain.size, now.size)) {
957 remain.size = int128_sub(remain.size, now.size);
958 remain.offset_within_address_space += int128_get64(now.size);
959 remain.offset_within_region += int128_get64(now.size);
961 if (int128_lt(remain.size, page_size)) {
962 register_subpage(d, &now);
963 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
964 now.size = page_size;
965 register_subpage(d, &now);
967 now.size = int128_and(now.size, int128_neg(page_size));
968 register_multipage(d, &now);
973 void qemu_flush_coalesced_mmio_buffer(void)
976 kvm_flush_coalesced_mmio_buffer();
979 void qemu_mutex_lock_ramlist(void)
981 qemu_mutex_lock(&ram_list.mutex);
984 void qemu_mutex_unlock_ramlist(void)
986 qemu_mutex_unlock(&ram_list.mutex);
993 #define HUGETLBFS_MAGIC 0x958458f6
995 static long gethugepagesize(const char *path)
1001 ret = statfs(path, &fs);
1002 } while (ret != 0 && errno == EINTR);
1009 if (fs.f_type != HUGETLBFS_MAGIC)
1010 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1015 static sigjmp_buf sigjump;
1017 static void sigbus_handler(int signal)
1019 siglongjmp(sigjump, 1);
1022 static void *file_ram_alloc(RAMBlock *block,
1027 char *sanitized_name;
1031 unsigned long hpagesize;
1033 hpagesize = gethugepagesize(path);
1038 if (memory < hpagesize) {
1042 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1043 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1047 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1048 sanitized_name = g_strdup(block->mr->name);
1049 for (c = sanitized_name; *c != '\0'; c++) {
1054 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1056 g_free(sanitized_name);
1058 fd = mkstemp(filename);
1060 perror("unable to create backing store for hugepages");
1067 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1070 * ftruncate is not supported by hugetlbfs in older
1071 * hosts, so don't bother bailing out on errors.
1072 * If anything goes wrong with it under other filesystems,
1075 if (ftruncate(fd, memory))
1076 perror("ftruncate");
1078 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1079 if (area == MAP_FAILED) {
1080 perror("file_ram_alloc: can't mmap RAM pages");
1087 struct sigaction act, oldact;
1088 sigset_t set, oldset;
1090 memset(&act, 0, sizeof(act));
1091 act.sa_handler = &sigbus_handler;
1094 ret = sigaction(SIGBUS, &act, &oldact);
1096 perror("file_ram_alloc: failed to install signal handler");
1100 /* unblock SIGBUS */
1102 sigaddset(&set, SIGBUS);
1103 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1105 if (sigsetjmp(sigjump, 1)) {
1106 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1110 /* MAP_POPULATE silently ignores failures */
1111 for (i = 0; i < (memory/hpagesize); i++) {
1112 memset(area + (hpagesize*i), 0, 1);
1115 ret = sigaction(SIGBUS, &oldact, NULL);
1117 perror("file_ram_alloc: failed to reinstall signal handler");
1121 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1134 static void *file_ram_alloc(RAMBlock *block,
1138 fprintf(stderr, "-mem-path not supported on this host\n");
1143 static ram_addr_t find_ram_offset(ram_addr_t size)
1145 RAMBlock *block, *next_block;
1146 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1148 assert(size != 0); /* it would hand out same offset multiple times */
1150 if (QTAILQ_EMPTY(&ram_list.blocks))
1153 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1154 ram_addr_t end, next = RAM_ADDR_MAX;
1156 end = block->offset + block->length;
1158 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1159 if (next_block->offset >= end) {
1160 next = MIN(next, next_block->offset);
1163 if (next - end >= size && next - end < mingap) {
1165 mingap = next - end;
1169 if (offset == RAM_ADDR_MAX) {
1170 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1178 ram_addr_t last_ram_offset(void)
1181 ram_addr_t last = 0;
1183 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1184 last = MAX(last, block->offset + block->length);
1189 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1193 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1194 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1195 "dump-guest-core", true)) {
1196 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1198 perror("qemu_madvise");
1199 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1200 "but dump_guest_core=off specified\n");
1205 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1207 RAMBlock *new_block, *block;
1210 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1211 if (block->offset == addr) {
1217 assert(!new_block->idstr[0]);
1220 char *id = qdev_get_dev_path(dev);
1222 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1226 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1228 /* This assumes the iothread lock is taken here too. */
1229 qemu_mutex_lock_ramlist();
1230 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1231 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1232 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1237 qemu_mutex_unlock_ramlist();
1240 static int memory_try_enable_merging(void *addr, size_t len)
1242 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1243 /* disabled by the user */
1247 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1250 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1253 RAMBlock *block, *new_block;
1254 ram_addr_t old_ram_size, new_ram_size;
1256 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1258 size = TARGET_PAGE_ALIGN(size);
1259 new_block = g_malloc0(sizeof(*new_block));
1262 /* This assumes the iothread lock is taken here too. */
1263 qemu_mutex_lock_ramlist();
1265 new_block->offset = find_ram_offset(size);
1267 new_block->host = host;
1268 new_block->flags |= RAM_PREALLOC_MASK;
1269 } else if (xen_enabled()) {
1271 fprintf(stderr, "-mem-path not supported with Xen\n");
1274 xen_ram_alloc(new_block->offset, size, mr);
1277 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1279 * file_ram_alloc() needs to allocate just like
1280 * phys_mem_alloc, but we haven't bothered to provide
1284 "-mem-path not supported with this accelerator\n");
1287 new_block->host = file_ram_alloc(new_block, size, mem_path);
1289 if (!new_block->host) {
1290 new_block->host = phys_mem_alloc(size);
1291 if (!new_block->host) {
1292 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1293 new_block->mr->name, strerror(errno));
1296 memory_try_enable_merging(new_block->host, size);
1299 new_block->length = size;
1301 /* Keep the list sorted from biggest to smallest block. */
1302 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1303 if (block->length < new_block->length) {
1308 QTAILQ_INSERT_BEFORE(block, new_block, next);
1310 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1312 ram_list.mru_block = NULL;
1315 qemu_mutex_unlock_ramlist();
1317 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1319 if (new_ram_size > old_ram_size) {
1321 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1322 ram_list.dirty_memory[i] =
1323 bitmap_zero_extend(ram_list.dirty_memory[i],
1324 old_ram_size, new_ram_size);
1327 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1329 qemu_ram_setup_dump(new_block->host, size);
1330 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1331 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1334 kvm_setup_guest_memory(new_block->host, size);
1336 return new_block->offset;
1339 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1341 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1344 void qemu_ram_free_from_ptr(ram_addr_t addr)
1348 /* This assumes the iothread lock is taken here too. */
1349 qemu_mutex_lock_ramlist();
1350 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1351 if (addr == block->offset) {
1352 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1353 ram_list.mru_block = NULL;
1359 qemu_mutex_unlock_ramlist();
1362 void qemu_ram_free(ram_addr_t addr)
1366 /* This assumes the iothread lock is taken here too. */
1367 qemu_mutex_lock_ramlist();
1368 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1369 if (addr == block->offset) {
1370 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1371 ram_list.mru_block = NULL;
1373 if (block->flags & RAM_PREALLOC_MASK) {
1375 } else if (xen_enabled()) {
1376 xen_invalidate_map_cache_entry(block->host);
1378 } else if (block->fd >= 0) {
1379 munmap(block->host, block->length);
1383 qemu_anon_ram_free(block->host, block->length);
1389 qemu_mutex_unlock_ramlist();
1394 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1401 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1402 offset = addr - block->offset;
1403 if (offset < block->length) {
1404 vaddr = block->host + offset;
1405 if (block->flags & RAM_PREALLOC_MASK) {
1407 } else if (xen_enabled()) {
1411 munmap(vaddr, length);
1412 if (block->fd >= 0) {
1414 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1417 flags |= MAP_PRIVATE;
1419 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1420 flags, block->fd, offset);
1423 * Remap needs to match alloc. Accelerators that
1424 * set phys_mem_alloc never remap. If they did,
1425 * we'd need a remap hook here.
1427 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1429 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1430 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1433 if (area != vaddr) {
1434 fprintf(stderr, "Could not remap addr: "
1435 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1439 memory_try_enable_merging(vaddr, length);
1440 qemu_ram_setup_dump(vaddr, length);
1446 #endif /* !_WIN32 */
1448 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1449 With the exception of the softmmu code in this file, this should
1450 only be used for local memory (e.g. video ram) that the device owns,
1451 and knows it isn't going to access beyond the end of the block.
1453 It should not be used for general purpose DMA.
1454 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1456 void *qemu_get_ram_ptr(ram_addr_t addr)
1458 RAMBlock *block = qemu_get_ram_block(addr);
1460 if (xen_enabled()) {
1461 /* We need to check if the requested address is in the RAM
1462 * because we don't want to map the entire memory in QEMU.
1463 * In that case just map until the end of the page.
1465 if (block->offset == 0) {
1466 return xen_map_cache(addr, 0, 0);
1467 } else if (block->host == NULL) {
1469 xen_map_cache(block->offset, block->length, 1);
1472 return block->host + (addr - block->offset);
1475 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1476 * but takes a size argument */
1477 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1482 if (xen_enabled()) {
1483 return xen_map_cache(addr, *size, 1);
1487 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1488 if (addr - block->offset < block->length) {
1489 if (addr - block->offset + *size > block->length)
1490 *size = block->length - addr + block->offset;
1491 return block->host + (addr - block->offset);
1495 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1500 /* Some of the softmmu routines need to translate from a host pointer
1501 (typically a TLB entry) back to a ram offset. */
1502 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1505 uint8_t *host = ptr;
1507 if (xen_enabled()) {
1508 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1509 return qemu_get_ram_block(*ram_addr)->mr;
1512 block = ram_list.mru_block;
1513 if (block && block->host && host - block->host < block->length) {
1517 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1518 /* This case append when the block is not mapped. */
1519 if (block->host == NULL) {
1522 if (host - block->host < block->length) {
1530 *ram_addr = block->offset + (host - block->host);
1534 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1535 uint64_t val, unsigned size)
1537 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1538 tb_invalidate_phys_page_fast(ram_addr, size);
1542 stb_p(qemu_get_ram_ptr(ram_addr), val);
1545 stw_p(qemu_get_ram_ptr(ram_addr), val);
1548 stl_p(qemu_get_ram_ptr(ram_addr), val);
1553 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1554 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1555 /* we remove the notdirty callback only if the code has been
1557 if (!cpu_physical_memory_is_clean(ram_addr)) {
1558 CPUArchState *env = current_cpu->env_ptr;
1559 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1563 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1564 unsigned size, bool is_write)
1569 static const MemoryRegionOps notdirty_mem_ops = {
1570 .write = notdirty_mem_write,
1571 .valid.accepts = notdirty_mem_accepts,
1572 .endianness = DEVICE_NATIVE_ENDIAN,
1575 /* Generate a debug exception if a watchpoint has been hit. */
1576 static void check_watchpoint(int offset, int len_mask, int flags)
1578 CPUState *cpu = current_cpu;
1579 CPUArchState *env = cpu->env_ptr;
1580 target_ulong pc, cs_base;
1585 if (cpu->watchpoint_hit) {
1586 /* We re-entered the check after replacing the TB. Now raise
1587 * the debug interrupt so that is will trigger after the
1588 * current instruction. */
1589 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1592 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1593 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1594 if ((vaddr == (wp->vaddr & len_mask) ||
1595 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1596 wp->flags |= BP_WATCHPOINT_HIT;
1597 if (!cpu->watchpoint_hit) {
1598 cpu->watchpoint_hit = wp;
1599 tb_check_watchpoint(cpu);
1600 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1601 cpu->exception_index = EXCP_DEBUG;
1604 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1605 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1606 cpu_resume_from_signal(cpu, NULL);
1610 wp->flags &= ~BP_WATCHPOINT_HIT;
1615 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1616 so these check for a hit then pass through to the normal out-of-line
1618 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1621 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1623 case 1: return ldub_phys(&address_space_memory, addr);
1624 case 2: return lduw_phys(&address_space_memory, addr);
1625 case 4: return ldl_phys(&address_space_memory, addr);
1630 static void watch_mem_write(void *opaque, hwaddr addr,
1631 uint64_t val, unsigned size)
1633 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1636 stb_phys(&address_space_memory, addr, val);
1639 stw_phys(&address_space_memory, addr, val);
1642 stl_phys(&address_space_memory, addr, val);
1648 static const MemoryRegionOps watch_mem_ops = {
1649 .read = watch_mem_read,
1650 .write = watch_mem_write,
1651 .endianness = DEVICE_NATIVE_ENDIAN,
1654 static uint64_t subpage_read(void *opaque, hwaddr addr,
1657 subpage_t *subpage = opaque;
1660 #if defined(DEBUG_SUBPAGE)
1661 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1662 subpage, len, addr);
1664 address_space_read(subpage->as, addr + subpage->base, buf, len);
1677 static void subpage_write(void *opaque, hwaddr addr,
1678 uint64_t value, unsigned len)
1680 subpage_t *subpage = opaque;
1683 #if defined(DEBUG_SUBPAGE)
1684 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1685 " value %"PRIx64"\n",
1686 __func__, subpage, len, addr, value);
1701 address_space_write(subpage->as, addr + subpage->base, buf, len);
1704 static bool subpage_accepts(void *opaque, hwaddr addr,
1705 unsigned len, bool is_write)
1707 subpage_t *subpage = opaque;
1708 #if defined(DEBUG_SUBPAGE)
1709 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1710 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1713 return address_space_access_valid(subpage->as, addr + subpage->base,
1717 static const MemoryRegionOps subpage_ops = {
1718 .read = subpage_read,
1719 .write = subpage_write,
1720 .valid.accepts = subpage_accepts,
1721 .endianness = DEVICE_NATIVE_ENDIAN,
1724 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1729 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1731 idx = SUBPAGE_IDX(start);
1732 eidx = SUBPAGE_IDX(end);
1733 #if defined(DEBUG_SUBPAGE)
1734 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1735 __func__, mmio, start, end, idx, eidx, section);
1737 for (; idx <= eidx; idx++) {
1738 mmio->sub_section[idx] = section;
1744 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1748 mmio = g_malloc0(sizeof(subpage_t));
1752 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1753 "subpage", TARGET_PAGE_SIZE);
1754 mmio->iomem.subpage = true;
1755 #if defined(DEBUG_SUBPAGE)
1756 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1757 mmio, base, TARGET_PAGE_SIZE);
1759 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1764 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1766 MemoryRegionSection section = {
1767 .address_space = &address_space_memory,
1769 .offset_within_address_space = 0,
1770 .offset_within_region = 0,
1771 .size = int128_2_64(),
1774 return phys_section_add(map, §ion);
1777 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1779 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1782 static void io_mem_init(void)
1784 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1785 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1786 "unassigned", UINT64_MAX);
1787 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1788 "notdirty", UINT64_MAX);
1789 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1790 "watch", UINT64_MAX);
1793 static void mem_begin(MemoryListener *listener)
1795 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1796 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1799 n = dummy_section(&d->map, &io_mem_unassigned);
1800 assert(n == PHYS_SECTION_UNASSIGNED);
1801 n = dummy_section(&d->map, &io_mem_notdirty);
1802 assert(n == PHYS_SECTION_NOTDIRTY);
1803 n = dummy_section(&d->map, &io_mem_rom);
1804 assert(n == PHYS_SECTION_ROM);
1805 n = dummy_section(&d->map, &io_mem_watch);
1806 assert(n == PHYS_SECTION_WATCH);
1808 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1810 as->next_dispatch = d;
1813 static void mem_commit(MemoryListener *listener)
1815 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1816 AddressSpaceDispatch *cur = as->dispatch;
1817 AddressSpaceDispatch *next = as->next_dispatch;
1819 phys_page_compact_all(next, next->map.nodes_nb);
1821 as->dispatch = next;
1824 phys_sections_free(&cur->map);
1829 static void tcg_commit(MemoryListener *listener)
1833 /* since each CPU stores ram addresses in its TLB cache, we must
1834 reset the modified entries */
1837 CPUArchState *env = cpu->env_ptr;
1839 /* FIXME: Disentangle the cpu.h circular files deps so we can
1840 directly get the right CPU from listener. */
1841 if (cpu->tcg_as_listener != listener) {
1848 static void core_log_global_start(MemoryListener *listener)
1850 cpu_physical_memory_set_dirty_tracking(true);
1853 static void core_log_global_stop(MemoryListener *listener)
1855 cpu_physical_memory_set_dirty_tracking(false);
1858 static MemoryListener core_memory_listener = {
1859 .log_global_start = core_log_global_start,
1860 .log_global_stop = core_log_global_stop,
1864 void address_space_init_dispatch(AddressSpace *as)
1866 as->dispatch = NULL;
1867 as->dispatch_listener = (MemoryListener) {
1869 .commit = mem_commit,
1870 .region_add = mem_add,
1871 .region_nop = mem_add,
1874 memory_listener_register(&as->dispatch_listener, as);
1877 void address_space_destroy_dispatch(AddressSpace *as)
1879 AddressSpaceDispatch *d = as->dispatch;
1881 memory_listener_unregister(&as->dispatch_listener);
1883 as->dispatch = NULL;
1886 static void memory_map_init(void)
1888 system_memory = g_malloc(sizeof(*system_memory));
1890 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1891 address_space_init(&address_space_memory, system_memory, "memory");
1893 system_io = g_malloc(sizeof(*system_io));
1894 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1896 address_space_init(&address_space_io, system_io, "I/O");
1898 memory_listener_register(&core_memory_listener, &address_space_memory);
1901 MemoryRegion *get_system_memory(void)
1903 return system_memory;
1906 MemoryRegion *get_system_io(void)
1911 #endif /* !defined(CONFIG_USER_ONLY) */
1913 /* physical memory access (slow version, mainly for debug) */
1914 #if defined(CONFIG_USER_ONLY)
1915 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1916 uint8_t *buf, int len, int is_write)
1923 page = addr & TARGET_PAGE_MASK;
1924 l = (page + TARGET_PAGE_SIZE) - addr;
1927 flags = page_get_flags(page);
1928 if (!(flags & PAGE_VALID))
1931 if (!(flags & PAGE_WRITE))
1933 /* XXX: this code should not depend on lock_user */
1934 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1937 unlock_user(p, addr, l);
1939 if (!(flags & PAGE_READ))
1941 /* XXX: this code should not depend on lock_user */
1942 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1945 unlock_user(p, addr, 0);
1956 static void invalidate_and_set_dirty(hwaddr addr,
1959 if (cpu_physical_memory_is_clean(addr)) {
1960 /* invalidate code */
1961 tb_invalidate_phys_page_range(addr, addr + length, 0);
1963 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1964 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1966 xen_modified_memory(addr, length);
1969 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1971 unsigned access_size_max = mr->ops->valid.max_access_size;
1973 /* Regions are assumed to support 1-4 byte accesses unless
1974 otherwise specified. */
1975 if (access_size_max == 0) {
1976 access_size_max = 4;
1979 /* Bound the maximum access by the alignment of the address. */
1980 if (!mr->ops->impl.unaligned) {
1981 unsigned align_size_max = addr & -addr;
1982 if (align_size_max != 0 && align_size_max < access_size_max) {
1983 access_size_max = align_size_max;
1987 /* Don't attempt accesses larger than the maximum. */
1988 if (l > access_size_max) {
1989 l = access_size_max;
1992 l = 1 << (qemu_fls(l) - 1);
1998 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1999 int len, bool is_write)
2010 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2013 if (!memory_access_is_direct(mr, is_write)) {
2014 l = memory_access_size(mr, l, addr1);
2015 /* XXX: could force current_cpu to NULL to avoid
2019 /* 64 bit write access */
2021 error |= io_mem_write(mr, addr1, val, 8);
2024 /* 32 bit write access */
2026 error |= io_mem_write(mr, addr1, val, 4);
2029 /* 16 bit write access */
2031 error |= io_mem_write(mr, addr1, val, 2);
2034 /* 8 bit write access */
2036 error |= io_mem_write(mr, addr1, val, 1);
2042 addr1 += memory_region_get_ram_addr(mr);
2044 ptr = qemu_get_ram_ptr(addr1);
2045 memcpy(ptr, buf, l);
2046 invalidate_and_set_dirty(addr1, l);
2049 if (!memory_access_is_direct(mr, is_write)) {
2051 l = memory_access_size(mr, l, addr1);
2054 /* 64 bit read access */
2055 error |= io_mem_read(mr, addr1, &val, 8);
2059 /* 32 bit read access */
2060 error |= io_mem_read(mr, addr1, &val, 4);
2064 /* 16 bit read access */
2065 error |= io_mem_read(mr, addr1, &val, 2);
2069 /* 8 bit read access */
2070 error |= io_mem_read(mr, addr1, &val, 1);
2078 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2079 memcpy(buf, ptr, l);
2090 bool address_space_write(AddressSpace *as, hwaddr addr,
2091 const uint8_t *buf, int len)
2093 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2096 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2098 return address_space_rw(as, addr, buf, len, false);
2102 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2103 int len, int is_write)
2105 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2108 enum write_rom_type {
2113 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2114 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2123 mr = address_space_translate(as, addr, &addr1, &l, true);
2125 if (!(memory_region_is_ram(mr) ||
2126 memory_region_is_romd(mr))) {
2129 addr1 += memory_region_get_ram_addr(mr);
2131 ptr = qemu_get_ram_ptr(addr1);
2134 memcpy(ptr, buf, l);
2135 invalidate_and_set_dirty(addr1, l);
2138 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2148 /* used for ROM loading : can write in RAM and ROM */
2149 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2150 const uint8_t *buf, int len)
2152 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2155 void cpu_flush_icache_range(hwaddr start, int len)
2158 * This function should do the same thing as an icache flush that was
2159 * triggered from within the guest. For TCG we are always cache coherent,
2160 * so there is no need to flush anything. For KVM / Xen we need to flush
2161 * the host's instruction cache at least.
2163 if (tcg_enabled()) {
2167 cpu_physical_memory_write_rom_internal(&address_space_memory,
2168 start, NULL, len, FLUSH_CACHE);
2178 static BounceBuffer bounce;
2180 typedef struct MapClient {
2182 void (*callback)(void *opaque);
2183 QLIST_ENTRY(MapClient) link;
2186 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2187 = QLIST_HEAD_INITIALIZER(map_client_list);
2189 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2191 MapClient *client = g_malloc(sizeof(*client));
2193 client->opaque = opaque;
2194 client->callback = callback;
2195 QLIST_INSERT_HEAD(&map_client_list, client, link);
2199 static void cpu_unregister_map_client(void *_client)
2201 MapClient *client = (MapClient *)_client;
2203 QLIST_REMOVE(client, link);
2207 static void cpu_notify_map_clients(void)
2211 while (!QLIST_EMPTY(&map_client_list)) {
2212 client = QLIST_FIRST(&map_client_list);
2213 client->callback(client->opaque);
2214 cpu_unregister_map_client(client);
2218 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2225 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2226 if (!memory_access_is_direct(mr, is_write)) {
2227 l = memory_access_size(mr, l, addr);
2228 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2239 /* Map a physical memory region into a host virtual address.
2240 * May map a subset of the requested range, given by and returned in *plen.
2241 * May return NULL if resources needed to perform the mapping are exhausted.
2242 * Use only for reads OR writes - not for read-modify-write operations.
2243 * Use cpu_register_map_client() to know when retrying the map operation is
2244 * likely to succeed.
2246 void *address_space_map(AddressSpace *as,
2253 hwaddr l, xlat, base;
2254 MemoryRegion *mr, *this_mr;
2262 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2263 if (!memory_access_is_direct(mr, is_write)) {
2264 if (bounce.buffer) {
2267 /* Avoid unbounded allocations */
2268 l = MIN(l, TARGET_PAGE_SIZE);
2269 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2273 memory_region_ref(mr);
2276 address_space_read(as, addr, bounce.buffer, l);
2280 return bounce.buffer;
2284 raddr = memory_region_get_ram_addr(mr);
2295 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2296 if (this_mr != mr || xlat != base + done) {
2301 memory_region_ref(mr);
2303 return qemu_ram_ptr_length(raddr + base, plen);
2306 /* Unmaps a memory region previously mapped by address_space_map().
2307 * Will also mark the memory as dirty if is_write == 1. access_len gives
2308 * the amount of memory that was actually read or written by the caller.
2310 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2311 int is_write, hwaddr access_len)
2313 if (buffer != bounce.buffer) {
2317 mr = qemu_ram_addr_from_host(buffer, &addr1);
2320 while (access_len) {
2322 l = TARGET_PAGE_SIZE;
2325 invalidate_and_set_dirty(addr1, l);
2330 if (xen_enabled()) {
2331 xen_invalidate_map_cache_entry(buffer);
2333 memory_region_unref(mr);
2337 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2339 qemu_vfree(bounce.buffer);
2340 bounce.buffer = NULL;
2341 memory_region_unref(bounce.mr);
2342 cpu_notify_map_clients();
2345 void *cpu_physical_memory_map(hwaddr addr,
2349 return address_space_map(&address_space_memory, addr, plen, is_write);
2352 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2353 int is_write, hwaddr access_len)
2355 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2358 /* warning: addr must be aligned */
2359 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2360 enum device_endian endian)
2368 mr = address_space_translate(as, addr, &addr1, &l, false);
2369 if (l < 4 || !memory_access_is_direct(mr, false)) {
2371 io_mem_read(mr, addr1, &val, 4);
2372 #if defined(TARGET_WORDS_BIGENDIAN)
2373 if (endian == DEVICE_LITTLE_ENDIAN) {
2377 if (endian == DEVICE_BIG_ENDIAN) {
2383 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2387 case DEVICE_LITTLE_ENDIAN:
2388 val = ldl_le_p(ptr);
2390 case DEVICE_BIG_ENDIAN:
2391 val = ldl_be_p(ptr);
2401 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2403 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2406 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2408 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2411 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2413 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2416 /* warning: addr must be aligned */
2417 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2418 enum device_endian endian)
2426 mr = address_space_translate(as, addr, &addr1, &l,
2428 if (l < 8 || !memory_access_is_direct(mr, false)) {
2430 io_mem_read(mr, addr1, &val, 8);
2431 #if defined(TARGET_WORDS_BIGENDIAN)
2432 if (endian == DEVICE_LITTLE_ENDIAN) {
2436 if (endian == DEVICE_BIG_ENDIAN) {
2442 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2446 case DEVICE_LITTLE_ENDIAN:
2447 val = ldq_le_p(ptr);
2449 case DEVICE_BIG_ENDIAN:
2450 val = ldq_be_p(ptr);
2460 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2462 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2465 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2467 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2470 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2472 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2476 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2479 address_space_rw(as, addr, &val, 1, 0);
2483 /* warning: addr must be aligned */
2484 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2485 enum device_endian endian)
2493 mr = address_space_translate(as, addr, &addr1, &l,
2495 if (l < 2 || !memory_access_is_direct(mr, false)) {
2497 io_mem_read(mr, addr1, &val, 2);
2498 #if defined(TARGET_WORDS_BIGENDIAN)
2499 if (endian == DEVICE_LITTLE_ENDIAN) {
2503 if (endian == DEVICE_BIG_ENDIAN) {
2509 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2513 case DEVICE_LITTLE_ENDIAN:
2514 val = lduw_le_p(ptr);
2516 case DEVICE_BIG_ENDIAN:
2517 val = lduw_be_p(ptr);
2527 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2529 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2532 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2534 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2537 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2539 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2542 /* warning: addr must be aligned. The ram page is not masked as dirty
2543 and the code inside is not invalidated. It is useful if the dirty
2544 bits are used to track modified PTEs */
2545 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2552 mr = address_space_translate(as, addr, &addr1, &l,
2554 if (l < 4 || !memory_access_is_direct(mr, true)) {
2555 io_mem_write(mr, addr1, val, 4);
2557 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2558 ptr = qemu_get_ram_ptr(addr1);
2561 if (unlikely(in_migration)) {
2562 if (cpu_physical_memory_is_clean(addr1)) {
2563 /* invalidate code */
2564 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2566 cpu_physical_memory_set_dirty_flag(addr1,
2567 DIRTY_MEMORY_MIGRATION);
2568 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2574 /* warning: addr must be aligned */
2575 static inline void stl_phys_internal(AddressSpace *as,
2576 hwaddr addr, uint32_t val,
2577 enum device_endian endian)
2584 mr = address_space_translate(as, addr, &addr1, &l,
2586 if (l < 4 || !memory_access_is_direct(mr, true)) {
2587 #if defined(TARGET_WORDS_BIGENDIAN)
2588 if (endian == DEVICE_LITTLE_ENDIAN) {
2592 if (endian == DEVICE_BIG_ENDIAN) {
2596 io_mem_write(mr, addr1, val, 4);
2599 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2600 ptr = qemu_get_ram_ptr(addr1);
2602 case DEVICE_LITTLE_ENDIAN:
2605 case DEVICE_BIG_ENDIAN:
2612 invalidate_and_set_dirty(addr1, 4);
2616 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2618 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2621 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2623 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2626 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2628 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2632 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2635 address_space_rw(as, addr, &v, 1, 1);
2638 /* warning: addr must be aligned */
2639 static inline void stw_phys_internal(AddressSpace *as,
2640 hwaddr addr, uint32_t val,
2641 enum device_endian endian)
2648 mr = address_space_translate(as, addr, &addr1, &l, true);
2649 if (l < 2 || !memory_access_is_direct(mr, true)) {
2650 #if defined(TARGET_WORDS_BIGENDIAN)
2651 if (endian == DEVICE_LITTLE_ENDIAN) {
2655 if (endian == DEVICE_BIG_ENDIAN) {
2659 io_mem_write(mr, addr1, val, 2);
2662 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2663 ptr = qemu_get_ram_ptr(addr1);
2665 case DEVICE_LITTLE_ENDIAN:
2668 case DEVICE_BIG_ENDIAN:
2675 invalidate_and_set_dirty(addr1, 2);
2679 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2681 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2684 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2686 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2689 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2691 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2695 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2698 address_space_rw(as, addr, (void *) &val, 8, 1);
2701 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2703 val = cpu_to_le64(val);
2704 address_space_rw(as, addr, (void *) &val, 8, 1);
2707 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2709 val = cpu_to_be64(val);
2710 address_space_rw(as, addr, (void *) &val, 8, 1);
2713 /* virtual memory access for debug (includes writing to ROM) */
2714 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2715 uint8_t *buf, int len, int is_write)
2722 page = addr & TARGET_PAGE_MASK;
2723 phys_addr = cpu_get_phys_page_debug(cpu, page);
2724 /* if no physical page mapped, return an error */
2725 if (phys_addr == -1)
2727 l = (page + TARGET_PAGE_SIZE) - addr;
2730 phys_addr += (addr & ~TARGET_PAGE_MASK);
2732 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2734 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2744 #if !defined(CONFIG_USER_ONLY)
2747 * A helper function for the _utterly broken_ virtio device model to find out if
2748 * it's running on a big endian machine. Don't do this at home kids!
2750 bool virtio_is_big_endian(void);
2751 bool virtio_is_big_endian(void)
2753 #if defined(TARGET_WORDS_BIGENDIAN)
2762 #ifndef CONFIG_USER_ONLY
2763 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2768 mr = address_space_translate(&address_space_memory,
2769 phys_addr, &phys_addr, &l, false);
2771 return !(memory_region_is_ram(mr) ||
2772 memory_region_is_romd(mr));
2775 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2779 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2780 func(block->host, block->offset, block->length, opaque);