mm: protect against concurrent vma expansion

[can-eth-gw-linux.git] / mm / mmap.c
diff --git a/mm/mmap.c b/mm/mmap.c

index 5646677a96d591f5ab1179473b3f4938d42d0ded..2b7d9e78a5693982a70f2b7f61d42afcad20ef01 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -89,6 +89,20 @@ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
   */
  struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp;
  
+/*
+ * The global memory commitment made in the system can be a metric
+ * that can be used to drive ballooning decisions when Linux is hosted
+ * as a guest. On Hyper-V, the host implements a policy engine for dynamically
+ * balancing memory across competing virtual machines that are hosted.
+ * Several metrics drive this policy engine including the guest reported
+ * memory commitment.
+ */
+unsigned long vm_memory_committed(void)
+{
+       return percpu_counter_read_positive(&vm_committed_as);
+}
+EXPORT_SYMBOL_GPL(vm_memory_committed);
+
  /*
   * Check that a process has enough memory to allocate a new virtual
   * mapping. 0 means there is enough memory for the allocation to
@@ -1474,7 +1488,11 @@ munmap_back:
                  *
                  * Answer: Yes, several device drivers can do it in their
                  *         f_op->mmap method. -DaveM
+                * Bug: If addr is changed, prev, rb_link, rb_parent should
+                *      be updated for vma_link()
                  */
+               WARN_ON_ONCE(addr != vma->vm_start);
+
                 addr = vma->vm_start;
                 pgoff = vma->vm_pgoff;
                 vm_flags = vma->vm_flags;
@@ -2051,6 +2069,18 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                 if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
                         error = acct_stack_growth(vma, size, grow);
                         if (!error) {
+                               /*
+                                * vma_gap_update() doesn't support concurrent
+                                * updates, but we only hold a shared mmap_sem
+                                * lock here, so we need to protect against
+                                * concurrent vma expansions.
+                                * vma_lock_anon_vma() doesn't help here, as
+                                * we don't guarantee that all growable vmas
+                                * in a mm share the same root anon vma.
+                                * So, we reuse mm->page_table_lock to guard
+                                * against concurrent vma expansions.
+                                */
+                               spin_lock(&vma->vm_mm->page_table_lock);
                                 anon_vma_interval_tree_pre_update_vma(vma);
                                 vma->vm_end = address;
                                 anon_vma_interval_tree_post_update_vma(vma);
@@ -2058,6 +2088,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
                                         vma_gap_update(vma->vm_next);
                                 else
                                         vma->vm_mm->highest_vm_end = address;
+                               spin_unlock(&vma->vm_mm->page_table_lock);
+
                                 perf_event_mmap(vma);
                         }
                 }
@@ -2108,11 +2140,25 @@ int expand_downwards(struct vm_area_struct *vma,
                 if (grow <= vma->vm_pgoff) {
                         error = acct_stack_growth(vma, size, grow);
                         if (!error) {
+                               /*
+                                * vma_gap_update() doesn't support concurrent
+                                * updates, but we only hold a shared mmap_sem
+                                * lock here, so we need to protect against
+                                * concurrent vma expansions.
+                                * vma_lock_anon_vma() doesn't help here, as
+                                * we don't guarantee that all growable vmas
+                                * in a mm share the same root anon vma.
+                                * So, we reuse mm->page_table_lock to guard
+                                * against concurrent vma expansions.
+                                */
+                               spin_lock(&vma->vm_mm->page_table_lock);
                                 anon_vma_interval_tree_pre_update_vma(vma);
                                 vma->vm_start = address;
                                 vma->vm_pgoff -= grow;
                                 anon_vma_interval_tree_post_update_vma(vma);
                                 vma_gap_update(vma);
+                               spin_unlock(&vma->vm_mm->page_table_lock);
+
                                 perf_event_mmap(vma);
                         }
                 }