Heads up: 375.26 build breaks after recent commits to kernel

mm/gup.c has had several updates over the last week, one of which adds an argument to get_user_pages_remote. This breaks NV_GET_USER_PAGES_REMOTE and causes the compilation of nvidia to fail. Builds against 4.9.0 work.

I don’t know enough to fix it, but i thought i’d bring it up.

Cheers,
Aaron

Here is the patch.

diff -ur kernel/common/inc/nv-mm.h b/kernel/common/inc/nv-mm.h
--- kernel/common/inc/nv-mm.h	2016-11-16 00:53:45.000000000 +0100
+++ b/kernel/common/inc/nv-mm.h	2016-12-15 09:54:13.755753901 +0100
@@ -83,7 +83,7 @@
             if (force)
                 flags |= FOLL_FORCE;
 
-            return get_user_pages_remote(tsk, mm, start, nr_pages, flags, pages, vmas);
+            return get_user_pages_remote(tsk, mm, start, nr_pages, flags, pages, vmas, NULL);
         }
     #endif
 #else
Only in b/kernel: nv_compiler.h
diff -ur kernel/nvidia/nv-p2p.c b/kernel/nvidia/nv-p2p.c
--- kernel/nvidia/nv-p2p.c	2016-11-16 00:53:45.000000000 +0100
+++ b/kernel/nvidia/nv-p2p.c	2016-12-15 09:56:29.315763465 +0100
@@ -146,7 +146,7 @@
 int nvidia_p2p_get_pages(
     uint64_t p2p_token,
     uint32_t va_space,
-    uint64_t virtual_address,
+    uint64_t address,
     uint64_t length,
     struct nvidia_p2p_page_table **page_table,
     void (*free_callback)(void * data),
@@ -211,7 +211,7 @@
     }
 
     status = rm_p2p_get_pages(sp, p2p_token, va_space,
-            virtual_address, length, physical_addresses, wreqmb_h,
+            address, length, physical_addresses, wreqmb_h,
             rreqmb_h, &entries, &gpu_uuid, *page_table,
             free_callback, data);
     if (status != NV_OK)
@@ -286,7 +286,7 @@
 
     if (bGetPages)
     {
-        rm_p2p_put_pages(sp, p2p_token, va_space, virtual_address,
+        rm_p2p_put_pages(sp, p2p_token, va_space, address,
                 gpu_uuid, *page_table);
     }
 
@@ -329,7 +329,7 @@
 int nvidia_p2p_put_pages(
     uint64_t p2p_token,
     uint32_t va_space,
-    uint64_t virtual_address,
+    uint64_t address,
     struct nvidia_p2p_page_table *page_table
 )
 {
@@ -343,7 +343,7 @@
         return rc;
     }
 
-    status = rm_p2p_put_pages(sp, p2p_token, va_space, virtual_address,
+    status = rm_p2p_put_pages(sp, p2p_token, va_space, address,
             page_table->gpu_uuid, page_table);
     if (status == NV_OK)
         nvidia_p2p_free_page_table(page_table);
diff -ur kernel/nvidia-drm/nvidia-drm-fence.c b/kernel/nvidia-drm/nvidia-drm-fence.c
--- kernel/nvidia-drm/nvidia-drm-fence.c	2016-11-16 00:44:48.000000000 +0100
+++ b/kernel/nvidia-drm/nvidia-drm-fence.c	2016-12-15 10:07:42.621810964 +0100
@@ -31,7 +31,7 @@
 
 #if defined(NV_DRM_DRIVER_HAS_GEM_PRIME_RES_OBJ)
 struct nv_fence {
-    struct fence base;
+    struct dma_fence base;
     spinlock_t lock;
 
     struct nvidia_drm_device *nv_dev;
@@ -51,7 +51,7 @@
 
 static const char *nvidia_drm_gem_prime_fence_op_get_driver_name
 (
-    struct fence *fence
+    struct dma_fence *fence
 )
 {
     return "NVIDIA";
@@ -59,7 +59,7 @@
 
 static const char *nvidia_drm_gem_prime_fence_op_get_timeline_name
 (
-    struct fence *fence
+    struct dma_fence *fence
 )
 {
     return "nvidia.prime";
@@ -67,7 +67,7 @@
 
 static bool nvidia_drm_gem_prime_fence_op_signaled
 (
-    struct fence *fence
+    struct dma_fence *fence
 )
 {
     struct nv_fence *nv_fence = container_of(fence, struct nv_fence, base);
@@ -99,7 +99,7 @@
 
 static bool nvidia_drm_gem_prime_fence_op_enable_signaling
 (
-    struct fence *fence
+    struct dma_fence *fence
 )
 {
     bool ret = true;
@@ -107,7 +107,7 @@
     struct nvidia_drm_gem_object *nv_gem = nv_fence->nv_gem;
     struct nvidia_drm_device *nv_dev = nv_fence->nv_dev;
 
-    if (fence_is_signaled(fence))
+    if (dma_fence_is_signaled(fence))
     {
         return false;
     }
@@ -132,7 +132,7 @@
     }
 
     nv_gem->fenceContext.softFence = fence;
-    fence_get(fence);
+    dma_fence_get(fence);
 
 unlock_struct_mutex:
     mutex_unlock(&nv_dev->dev->struct_mutex);
@@ -142,7 +142,7 @@
 
 static void nvidia_drm_gem_prime_fence_op_release
 (
-    struct fence *fence
+    struct dma_fence *fence
 )
 {
     struct nv_fence *nv_fence = container_of(fence, struct nv_fence, base);
@@ -151,7 +151,7 @@
 
 static signed long nvidia_drm_gem_prime_fence_op_wait
 (
-    struct fence *fence,
+    struct dma_fence *fence,
     bool intr,
     signed long timeout
 )
@@ -166,12 +166,12 @@
      * that it should never get hit during normal operation, but not so long
      * that the system becomes unresponsive.
      */
-    return fence_default_wait(fence, intr,
+    return dma_fence_default_wait(fence, intr,
                               (timeout == MAX_SCHEDULE_TIMEOUT) ?
                                   msecs_to_jiffies(96) : timeout);
 }
 
-static const struct fence_ops nvidia_drm_gem_prime_fence_ops = {
+static const struct dma_fence_ops nvidia_drm_gem_prime_fence_ops = {
     .get_driver_name = nvidia_drm_gem_prime_fence_op_get_driver_name,
     .get_timeline_name = nvidia_drm_gem_prime_fence_op_get_timeline_name,
     .signaled = nvidia_drm_gem_prime_fence_op_signaled,
@@ -281,7 +281,7 @@
     bool force
 )
 {
-    struct fence *fence = nv_gem->fenceContext.softFence;
+    struct dma_fence *fence = nv_gem->fenceContext.softFence;
 
     WARN_ON(!mutex_is_locked(&nv_dev->dev->struct_mutex));
 
@@ -297,10 +297,10 @@
 
         if (force || nv_fence_ready_to_signal(nv_fence))
         {
-            fence_signal(&nv_fence->base);
+            dma_fence_signal(&nv_fence->base);
 
             nv_gem->fenceContext.softFence = NULL;
-            fence_put(&nv_fence->base);
+            dma_fence_put(&nv_fence->base);
 
             nvKms->disableChannelEvent(nv_dev->pDevice,
                                        nv_gem->fenceContext.cb);
@@ -316,7 +316,7 @@
 
         nv_fence = container_of(fence, struct nv_fence, base);
 
-        fence_signal(&nv_fence->base);
+        dma_fence_signal(&nv_fence->base);
     }
 }
 
@@ -509,7 +509,7 @@
      * fence_context_alloc() cannot fail, so we do not need to check a return
      * value.
      */
-    nv_gem->fenceContext.context = fence_context_alloc(1);
+    nv_gem->fenceContext.context = dma_fence_context_alloc(1);
 
     ret = nvidia_drm_gem_prime_fence_import_semaphore(
               nv_dev, nv_gem, p->index,
@@ -666,13 +666,13 @@
     nv_fence->nv_gem = nv_gem;
 
     spin_lock_init(&nv_fence->lock);
-    fence_init(&nv_fence->base, &nvidia_drm_gem_prime_fence_ops,
+    dma_fence_init(&nv_fence->base, &nvidia_drm_gem_prime_fence_ops,
                &nv_fence->lock, nv_gem->fenceContext.context,
                p->sem_thresh);
 
     reservation_object_add_excl_fence(&nv_gem->fenceContext.resv,
                                       &nv_fence->base);
-    fence_put(&nv_fence->base); /* Reservation object has reference */
+    dma_fence_put(&nv_fence->base); /* Reservation object has reference */
 
     ret = 0;
 
diff -ur kernel/nvidia-drm/nvidia-drm-gem.h b/kernel/nvidia-drm/nvidia-drm-gem.h
--- kernel/nvidia-drm/nvidia-drm-gem.h	2016-11-16 00:44:48.000000000 +0100
+++ b/kernel/nvidia-drm/nvidia-drm-gem.h	2016-12-15 10:14:29.463839666 +0100
@@ -98,7 +98,7 @@
         /* Software signaling structures */
         struct NvKmsKapiChannelEvent *cb;
         struct nvidia_drm_gem_prime_soft_fence_event_args *cbArgs;
-        struct fence *softFence; /* Fence for software signaling */
+        struct dma_fence *softFence; /* Fence for software signaling */
     } fenceContext;
 #endif
 };
diff -ur kernel/nvidia-drm/nvidia-drm-modeset.c b/kernel/nvidia-drm/nvidia-drm-modeset.c
--- kernel/nvidia-drm/nvidia-drm-modeset.c	2016-11-16 00:44:48.000000000 +0100
+++ b/kernel/nvidia-drm/nvidia-drm-modeset.c	2016-12-15 09:56:29.305763464 +0100
@@ -69,8 +69,7 @@
 
 void nvidia_drm_atomic_state_free(struct drm_atomic_state *state)
 {
-    struct nvidia_drm_atomic_state *nv_state =
-                    to_nv_atomic_state(state);
+    struct nvidia_drm_atomic_state *nv_state = to_nv_atomic_state(state);
     drm_atomic_state_default_release(state);
     nvidia_drm_free(nv_state);
 }
@@ -645,7 +644,7 @@
 
     wake_up_all(&nv_dev->pending_commit_queue);
 
-    drm_atomic_state_free(state);
+    drm_atomic_state_put(state);
 
 #if !defined(NV_DRM_MODE_CONFIG_FUNCS_HAS_ATOMIC_STATE_ALLOC)
     nvidia_drm_free(requested_config);
@@ -983,7 +982,7 @@
      * drm_atomic_commit().
      */
     if (ret != 0) {
-        drm_atomic_state_free(state);
+        drm_atomic_state_put(state);
     }
 
     drm_modeset_unlock_all(dev);
diff -ur kernel/nvidia-drm/nvidia-drm-priv.h b/kernel/nvidia-drm/nvidia-drm-priv.h
--- kernel/nvidia-drm/nvidia-drm-priv.h	2016-11-16 00:44:48.000000000 +0100
+++ b/kernel/nvidia-drm/nvidia-drm-priv.h	2016-12-14 02:24:41.694755859 +0100
@@ -34,7 +34,7 @@
 #endif
 
 #if defined(NV_DRM_DRIVER_HAS_GEM_PRIME_RES_OBJ)
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 #include <linux/reservation.h>
 #endif
 
diff -ur kernel/nvidia-uvm/uvm8.c b/kernel/nvidia-uvm/uvm8.c
--- kernel/nvidia-uvm/uvm8.c	2016-11-16 00:53:31.000000000 +0100
+++ b/kernel/nvidia-uvm/uvm8.c	2016-12-15 09:56:29.321763465 +0100
@@ -101,7 +101,7 @@
 // so we force it to fail instead.
 static int uvm_vm_fault_sigbus(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-    UVM_DBG_PRINT_RL("Fault to address 0x%p in disabled vma\n", vmf->virtual_address);
+    UVM_DBG_PRINT_RL("Fault to address 0x%p in disabled vma\n", vmf->address);
     vmf->page = NULL;
     return VM_FAULT_SIGBUS;
 }
@@ -315,7 +315,7 @@
 {
     uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
     uvm_va_block_t *va_block;
-    NvU64 fault_addr = (NvU64)(uintptr_t)vmf->virtual_address;
+    NvU64 fault_addr = (NvU64)(uintptr_t)vmf->address;
     bool is_write = vmf->flags & FAULT_FLAG_WRITE;
     NV_STATUS status = uvm_global_get_status();
     bool tools_enabled;
diff -ur kernel/nvidia-uvm/uvm8_test.c b/kernel/nvidia-uvm/uvm8_test.c
--- kernel/nvidia-uvm/uvm8_test.c	2016-11-16 00:53:31.000000000 +0100
+++ b/kernel/nvidia-uvm/uvm8_test.c	2016-12-15 09:56:29.347763467 +0100
@@ -103,7 +103,7 @@
     return NV_ERR_INVALID_STATE;
 }
 
-static NV_STATUS uvm8_test_get_kernel_virtual_address(
+static NV_STATUS uvm8_test_get_kernel_address(
         UVM_TEST_GET_KERNEL_VIRTUAL_ADDRESS_PARAMS *params,
         struct file *filp)
 {
@@ -173,7 +173,7 @@
         UVM_ROUTE_CMD_STACK(UVM_TEST_RANGE_GROUP_RANGE_COUNT,       uvm8_test_range_group_range_count);
         UVM_ROUTE_CMD_STACK(UVM_TEST_GET_PREFETCH_FAULTS_REENABLE_LAPSE, uvm8_test_get_prefetch_faults_reenable_lapse);
         UVM_ROUTE_CMD_STACK(UVM_TEST_SET_PREFETCH_FAULTS_REENABLE_LAPSE, uvm8_test_set_prefetch_faults_reenable_lapse);
-        UVM_ROUTE_CMD_STACK(UVM_TEST_GET_KERNEL_VIRTUAL_ADDRESS,    uvm8_test_get_kernel_virtual_address);
+        UVM_ROUTE_CMD_STACK(UVM_TEST_GET_KERNEL_VIRTUAL_ADDRESS,    uvm8_test_get_kernel_address);
         UVM_ROUTE_CMD_STACK(UVM_TEST_PMA_ALLOC_FREE,                uvm8_test_pma_alloc_free);
         UVM_ROUTE_CMD_STACK(UVM_TEST_PMM_ALLOC_FREE_ROOT,           uvm8_test_pmm_alloc_free_root);
         UVM_ROUTE_CMD_STACK(UVM_TEST_PMM_INJECT_PMA_EVICT_ERROR,    uvm8_test_pmm_inject_pma_evict_error);
diff -ur kernel/nvidia-uvm/uvm_lite.c b/kernel/nvidia-uvm/uvm_lite.c
--- kernel/nvidia-uvm/uvm_lite.c	2016-11-16 00:53:32.000000000 +0100
+++ b/kernel/nvidia-uvm/uvm_lite.c	2016-12-15 09:56:29.367763468 +0100
@@ -1333,7 +1333,7 @@
 #if defined(NV_VM_OPERATIONS_STRUCT_HAS_FAULT)
 int _fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-    unsigned long vaddr = (unsigned long)vmf->virtual_address;
+    unsigned long vaddr = (unsigned long)vmf->address;
     struct page *page = NULL;
     int retval;

Following errors are regarding the hotplug changes:

/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/nvidia/nv-pat.c: In function 'nvidia_cpu_callback':
/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/nvidia/nv-pat.c:213:14: error: 'CPU_DOWN_FAILED' undeclared (first use in this function)
         case CPU_DOWN_FAILED:
              ^~~~~~~~~~~~~~~
/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/nvidia/nv-pat.c:213:14: note: each undeclared identifier is reported only once for each function it appears in
/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/nvidia/nv-pat.c:220:14: error: 'CPU_DOWN_PREPARE' undeclared (first use in this function)
         case CPU_DOWN_PREPARE:
              ^~~~~~~~~~~~~~~~
In file included from /home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/nvidia/nv-pat.c:15:0:
/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/nvidia/nv-pat.c: In function 'nv_init_pat_support':
/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/common/inc/nv-linux.h:391:34: error: implicit declaration of function 'register_cpu_notifier' [-Werror=implicit-function-declaration]
 #define register_hotcpu_notifier register_cpu_notifier
                                  ^
/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/nvidia/nv-pat.c:258:17: note: in expansion of macro 'register_hotcpu_notifier'
             if (register_hotcpu_notifier(&nv_hotcpu_nfb) != 0)
                 ^~~~~~~~~~~~~~~~~~~~~~~~
/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/nvidia/nv-pat.c: In function 'nv_teardown_pat_support':
/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/common/inc/nv-linux.h:388:36: error: implicit declaration of function 'unregister_cpu_notifier' [-Werror=implicit-function-declaration]
 #define unregister_hotcpu_notifier unregister_cpu_notifier
                                    ^
/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/nvidia/nv-pat.c:283:9: note: in expansion of macro 'unregister_hotcpu_notifier'
         unregister_hotcpu_notifier(&nv_hotcpu_nfb);
         ^~~~~~~~~~~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors
make[2]: *** [scripts/Makefile.build:293: /home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel/nvidia/nv-pat.o] Error 1
make[2]: *** Waiting for unfinished jobs....
make[1]: *** [Makefile:1490: _module_/home/phil/dev/git/manjaro/repositories/extra/linux410-extramodules/nvidia/src/NVIDIA-Linux-x86-375.26/kernel] Error 2
make[1]: Leaving directory '/usr/lib/modules/4.10.0-1-MANJARO/build'
make: *** [Makefile:81: modules] Error 2

So the code needs to be converted to hotplug state machine similar like this.

The patch is incompatible with 4.10-rc1.

Could you update it?