pastebin - collaborative debugging tool
rovema.kpaste.net RSS


msnfs41client: Patches for getattrafterclose-workaround, 2024-03-01
Posted by Anonymous on Fri 1st Mar 2024 11:38
raw | new post

  1. From 2a706f0ca2178c06b2642ba8bceed9a6c14ec813 Mon Sep 17 00:00:00 2001
  2. From: Roland Mainz <roland.mainz@nrubsig.org>
  3. Date: Fri, 1 Mar 2024 11:13:35 +0100
  4. Subject: [PATCH 1/4] sys: Work around random crashes in
  5.  |SeImpersonateClientEx()|
  6.  
  7. Add workaround for random crashes in |SeImpersonateClientEx()|,
  8. which catches the kernel exception and returns
  9. |STATUS_INTERNAL_ERROR| to the caller.
  10.  
  11. Signed-off-by: Cedric Blancher <cedric.blancher@gmail.com>
  12. ---
  13. sys/nfs41_driver.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++
  14.  1 file changed, 49 insertions(+)
  15.  
  16. diff --git a/sys/nfs41_driver.c b/sys/nfs41_driver.c
  17. index 52c633c..042676d 100644
  18. --- a/sys/nfs41_driver.c
  19. +++ b/sys/nfs41_driver.c
  20. @@ -1364,7 +1364,56 @@ NTSTATUS handle_upcall(
  21.      ULONG cbOut = LowIoContext->ParamsFor.IoCtl.OutputBufferLength;
  22.      unsigned char *pbOut = LowIoContext->ParamsFor.IoCtl.pOutputBuffer;
  23.  
  24. +#ifdef NFS41_DRIVER_STABILITY_HACKS
  25. +    /*
  26. +     * Workaround for random crashes like this while compiling
  27. +     * the "gcc" compiler with a highly-parallel build.
  28. +     * Stack trace usually looks like this:
  29. +     * ---- snip ----
  30. +     * nt!SeTokenCanImpersonate+0x47
  31. +     * nt!PsImpersonateClient+0x126
  32. +     * nt!SeImpersonateClientEx+0x35
  33. +     * nfs41_driver!handle_upcall+0x59 [C:\cygwin64\home\roland_mainz\work\msnfs41_uidmapping\ms-nfs41-client\sys\nfs41_driver.c @ 1367]
  34. +     * nfs41_driver!nfs41_upcall+0xe7 [C:\cygwin64\home\roland_mainz\work\msnfs41_uidmapping\ms-nfs41-client\sys\nfs41_driver.c @ 1578]
  35. +     * nfs41_driver!nfs41_DevFcbXXXControlFile+0x128 [C:\cygwin64\home\roland_mainz\work\msnfs41_uidmapping\ms-nfs41-client\sys\nfs41_driver.c @ 2418]
  36. +     * nfs41_driver!RxXXXControlFileCallthru+0x76 [base\fs\rdr2\rdbss\ntdevfcb.c @ 130]
  37. +     * nfs41_driver!RxCommonDevFCBIoCtl+0x58 [base\fs\rdr2\rdbss\ntdevfcb.c @ 491]
  38. +     * nfs41_driver!RxFsdCommonDispatch+0x442 [base\fs\rdr2\rdbss\ntfsd.c @ 848]
  39. +     * nfs41_driver!RxFsdDispatch+0xfd [base\fs\rdr2\rdbss\ntfsd.c @ 442]
  40. +     * nfs41_driver!nfs41_FsdDispatch+0x67 [C:\cygwin64\home\roland_mainz\work\msnfs41_uidmapping\ms-nfs41-client\sys\nfs41_driver.c @ 6863]
  41. +     * nt!IofCallDriver+0x55
  42. +     * mup!MupiCallUncProvider+0xb8
  43. +     * mup!MupStateMachine+0x59
  44. +     * mup!MupFsdIrpPassThrough+0x17e
  45. +     * nt!IofCallDriver+0x55
  46. +     * FLTMGR!FltpDispatch+0xd6
  47. +     * nt!IofCallDriver+0x55
  48. +     * nt!IopSynchronousServiceTail+0x34c
  49. +     * nt!IopXxxControlFile+0xd13
  50. +     * nt!NtDeviceIoControlFile+0x56
  51. +     * nt!KiSystemServiceCopyEnd+0x25
  52. +     * ntdll!NtDeviceIoControlFile+0x14
  53. +     * KERNELBASE!DeviceIoControl+0x6b
  54. +     * KERNEL32!DeviceIoControlImplementation+0x81
  55. +     * nfsd_debug+0xc7b14
  56. +     * nfsd_debug+0xc79fb
  57. +     * nfsd_debug+0x171e80
  58. +     * KERNEL32!BaseThreadInitThunk+0x14
  59. +     * ntdll!RtlUserThreadStart+0x21
  60. +     * ---- snip ----
  61. +     */
  62. +    __try {
  63. +        status = SeImpersonateClientEx(entry->psec_ctx, NULL);
  64. +    } __except(EXCEPTION_EXECUTE_HANDLER) {
  65. +        NTSTATUS code;
  66. +        code = GetExceptionCode();
  67. +        print_error("handle_upcall: Call to SeImpersonateClientEx() "
  68. +            "failed due to exception 0x%0x\n", (int)code);
  69. +        status = STATUS_INTERNAL_ERROR;
  70. +    }
  71. +#else
  72.      status = SeImpersonateClientEx(entry->psec_ctx, NULL);
  73. +#endif /* NFS41_DRIVER_STABILITY_HACKS */
  74.      if (status != STATUS_SUCCESS) {
  75.          print_error("SeImpersonateClientEx failed %x\n", status);
  76.          goto out;
  77. --
  78. 2.43.0
  79.  
  80. From c23ab23ec78638fb3f9c99119a5df823d4aad5d4 Mon Sep 17 00:00:00 2001
  81. From: Roland Mainz <roland.mainz@nrubsig.org>
  82. Date: Fri, 1 Mar 2024 11:15:51 +0100
  83. Subject: [PATCH 2/4] daemon: Add debug infrastructure to catch ptr
  84.  use-after-free cases
  85.  
  86. Add debug infrastructure to catch pointer-use-after-free cases.
  87.  
  88. Signed-off-by: Cedric Blancher <cedric.blancher@gmail.com>
  89. ---
  90. daemon/daemon_debug.c | 60 +++++++++++++++++++++++++++++++++++++++++++
  91.  daemon/daemon_debug.h |  6 +++++
  92.  2 files changed, 66 insertions(+)
  93.  
  94. diff --git a/daemon/daemon_debug.c b/daemon/daemon_debug.c
  95. index 9941ce2..b1cb396 100644
  96. --- a/daemon/daemon_debug.c
  97. +++ b/daemon/daemon_debug.c
  98. @@ -802,3 +802,63 @@ void print_nfs41_file_info(
  99.  
  100.      dprintf_out("%s={ %s }\n", label, buf);
  101.  }
  102. +
  103. +#define NUM_RECENTLY_DELETED 128
  104. +static struct
  105. +{
  106. +    SRWLOCK lock;
  107. +    void *deleted_ptrs[NUM_RECENTLY_DELETED];
  108. +    size_t deleted_index;
  109. +} ptr_recently_deleted_data = {
  110. +    .lock = SRWLOCK_INIT,
  111. +};
  112. +
  113. +bool debug_ptr_was_recently_deleted(void *in_ptr)
  114. +{
  115. +    size_t i;
  116. +    bool_t res = false;
  117. +
  118. +    AcquireSRWLockShared(&ptr_recently_deleted_data.lock);
  119. +    for (i=0 ; i < NUM_RECENTLY_DELETED ; i++) {
  120. +        if (ptr_recently_deleted_data.deleted_ptrs[i] == in_ptr) {
  121. +            res = true;
  122. +            break;
  123. +        }
  124. +    }
  125. +    ReleaseSRWLockShared(&ptr_recently_deleted_data.lock);
  126. +    return res;
  127. +}
  128. +
  129. +void debug_ptr_add_recently_deleted(void *in_ptr)
  130. +{
  131. +    size_t i;
  132. +
  133. +    AcquireSRWLockExclusive(&ptr_recently_deleted_data.lock);
  134. +    i = ptr_recently_deleted_data.deleted_index++ % NUM_RECENTLY_DELETED;
  135. +    ptr_recently_deleted_data.deleted_ptrs[i] = in_ptr;
  136. +    ReleaseSRWLockExclusive(&ptr_recently_deleted_data.lock);
  137. +}
  138. +
  139. +#define NUM_DELAY_FREE 2048
  140. +static struct
  141. +{
  142. +    SRWLOCK lock;
  143. +    void *free_ptrs[NUM_DELAY_FREE];
  144. +    size_t free_ptrs_index;
  145. +} debug_delay_free_data = {
  146. +    .lock = SRWLOCK_INIT,
  147. +};
  148. +
  149. +void debug_delayed_free(void *in_ptr)
  150. +{
  151. +    size_t i;
  152. +
  153. +    AcquireSRWLockExclusive(&debug_delay_free_data.lock);
  154. +    i = debug_delay_free_data.free_ptrs_index++ % NUM_DELAY_FREE;
  155. +
  156. +    if (debug_delay_free_data.free_ptrs[i])
  157. +        free(debug_delay_free_data.free_ptrs[i]);
  158. +
  159. +    debug_delay_free_data.free_ptrs[i] = in_ptr;
  160. +    ReleaseSRWLockExclusive(&debug_delay_free_data.lock);
  161. +}
  162. diff --git a/daemon/daemon_debug.h b/daemon/daemon_debug.h
  163. index 1b693e3..2bb42b6 100644
  164. --- a/daemon/daemon_debug.h
  165. +++ b/daemon/daemon_debug.h
  166. @@ -31,6 +31,7 @@
  167.  #else
  168.  # include <stdlib.h>
  169.  #endif
  170. +#include <stdbool.h>
  171.  
  172.  #define DEFAULT_DEBUG_LEVEL 1
  173.  
  174. @@ -118,4 +119,9 @@ const char* pnfs_iomode_string(enum pnfs_iomode iomode);
  175.  void dprint_layout(int level, const struct __pnfs_file_layout *layout);
  176.  void dprint_device(int level, const struct __pnfs_file_device *device);
  177.  
  178. +bool debug_ptr_was_recently_deleted(void *in_ptr);
  179. +void debug_ptr_add_recently_deleted(void *in_ptr);
  180. +
  181. +void debug_delayed_free(void *in_ptr);
  182. +
  183.  #endif
  184. --
  185. 2.43.0
  186.  
  187. From c8ac7d015f66ff46aa582ff516a87f659e9e9468 Mon Sep 17 00:00:00 2001
  188. From: Roland Mainz <roland.mainz@nrubsig.org>
  189. Date: Fri, 1 Mar 2024 11:18:05 +0100
  190. Subject: [PATCH 3/4] daemon: Make sure |ref_count| used by
  191.  |InterlockedIncrement()|&co is |volatile|&aligned
  192.  
  193. Make sure |ref_count| used by |InterlockedIncrement()|+
  194. |InterlockedDecrement()| is |volatile|&aligned to 8 bytes
  195. as mandated in the Microsoft documentation.
  196.  
  197. Signed-off-by: Cedric Blancher <cedric.blancher@gmail.com>
  198. ---
  199. daemon/nfs41.h | 8 ++++----
  200.  1 file changed, 4 insertions(+), 4 deletions(-)
  201.  
  202. diff --git a/daemon/nfs41.h b/daemon/nfs41.h
  203. index c8216dd..698e970 100644
  204. --- a/daemon/nfs41.h
  205. +++ b/daemon/nfs41.h
  206. @@ -84,7 +84,7 @@ typedef struct __nfs41_server {
  207.      nfs41_superblock_list superblocks;
  208.      struct nfs41_name_cache *name_cache;
  209.      struct list_entry entry; /* position in global server list */
  210. -    LONG ref_count;
  211. +    __declspec(align(8)) volatile LONG ref_count;
  212.  } nfs41_server;
  213.  
  214.  enum delegation_status {
  215. @@ -99,7 +99,7 @@ typedef struct __nfs41_delegation_state {
  216.      nfs41_path_fh parent;
  217.      nfs41_path_fh file;
  218.      struct list_entry client_entry; /* entry in nfs41_client.delegations */
  219. -    LONG ref_count;
  220. +    __declspec(align(8)) volatile LONG ref_count;
  221.  
  222.      enum delegation_status status;
  223.      SRWLOCK lock;
  224. @@ -138,7 +138,7 @@ typedef struct __nfs41_open_state {
  225.      struct __pnfs_layout_state *layout;
  226.      struct list_entry client_entry; /* entry in nfs41_client.opens */
  227.      SRWLOCK lock;
  228. -    LONG ref_count;
  229. +    __declspec(align(8)) volatile LONG ref_count;
  230.      uint32_t share_access;
  231.      uint32_t share_deny;
  232.      uint64_t pnfs_last_offset; /* for layoutcommit */
  233. @@ -296,7 +296,7 @@ typedef struct __nfs41_root {
  234.      struct list_entry clients;
  235.      uint32_t wsize;
  236.      uint32_t rsize;
  237. -    LONG ref_count;
  238. +    __declspec(align(8)) volatile LONG ref_count;
  239.      uint32_t uid;
  240.      uint32_t gid;
  241.      DWORD sec_flavor;
  242. --
  243. 2.43.0
  244.  
  245. From 8fb13d8365d6dd906023fc7ca610f110f41e67b8 Mon Sep 17 00:00:00 2001
  246. From: Roland Mainz <roland.mainz@nrubsig.org>
  247. Date: Fri, 1 Mar 2024 11:31:31 +0100
  248. Subject: [PATCH 4/4] daemon: Add workaround/hack for getattr-afer-file-close
  249.  
  250. Add workaround/hack for getattr-afer-file-close, where
  251. a |upcall->state_ref| refers to a already deleted
  252. |nfs41_open_state| with a |ref_count| of |0|.
  253. Tthis happens because the Windows filesystem driver architecture
  254. is highly multithreaded, so one thread can do the file close
  255. request in the userland daemon, while in parallel another
  256. thread can begin the getattr, which leads to this kind of race
  257. condition.
  258.  
  259. All code related to this hack is within |#ifdef
  260. NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS ... #endif|
  261. to make sure we can properly remove it once we find
  262. a real fix.
  263.  
  264. Signed-off-by: Cedric Blancher <cedric.blancher@gmail.com>
  265. ---
  266. daemon/getattr.c       | 16 ++++++++++++----
  267.  daemon/open.c          | 18 +++++++++++++++---
  268.  daemon/upcall.c        | 20 ++++++++++++++++++++
  269.  nfs41_build_features.h |  8 ++++++++
  270.  4 files changed, 55 insertions(+), 7 deletions(-)
  271.  
  272. diff --git a/daemon/getattr.c b/daemon/getattr.c
  273. index b85a87a..325cb9e 100644
  274. --- a/daemon/getattr.c
  275. +++ b/daemon/getattr.c
  276. @@ -3,6 +3,7 @@
  277.   *
  278.   * Olga Kornievskaia <aglo@umich.edu>
  279.   * Casey Bodley <cbodley@umich.edu>
  280. + * Roland Mainz <roland.mainz@nrubsig.org>
  281.   *
  282.   * This library is free software; you can redistribute it and/or modify it
  283.   * under the terms of the GNU Lesser General Public License as published by
  284. @@ -61,13 +62,20 @@ static int parse_getattr(unsigned char *buffer, uint32_t length, nfs41_upcall *u
  285.  {
  286.      int status;
  287.  
  288. -#ifdef NFS41_DRIVER_STABILITY_HACKS
  289. +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
  290.      EASSERT(length > 4);
  291.      if (length <= 4) {
  292.          status = ERROR_INVALID_PARAMETER;
  293.          goto out;
  294.      }
  295.  
  296. +    if (debug_ptr_was_recently_deleted(upcall->state_ref)) {
  297. +        eprintf("parse_getattr: upcall->state_ref(=0x%p) was "
  298. +            "recently deleted\n", upcall->state_ref);
  299. +        status = ERROR_INVALID_PARAMETER;
  300. +        goto out;
  301. +    }
  302. +
  303.      EASSERT_IS_VALID_NON_NULL_PTR(upcall->state_ref);
  304.      if (!DEBUG_IS_VALID_NON_NULL_PTR(upcall->state_ref)) {
  305.          status = ERROR_INVALID_PARAMETER;
  306. @@ -93,7 +101,7 @@ static int parse_getattr(unsigned char *buffer, uint32_t length, nfs41_upcall *u
  307.          goto out;
  308.      }
  309.      EASSERT(upcall->state_ref->ref_count > 0);
  310. -#endif /* NFS41_DRIVER_STABILITY_HACKS */
  311. +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
  312.  
  313.      getattr_upcall_args *args = &upcall->args.getattr;
  314.      status = safe_read(&buffer, &length, &args->query_class, sizeof(args->query_class));
  315. @@ -115,7 +123,7 @@ static int handle_getattr(void *daemon_context, nfs41_upcall *upcall)
  316.      nfs41_open_state *state = upcall->state_ref;
  317.      nfs41_file_info info = { 0 };
  318.  
  319. -#ifdef NFS41_DRIVER_STABILITY_HACKS
  320. +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
  321.      if (!DEBUG_IS_VALID_NON_NULL_PTR(state->session)) {
  322.          eprintf("handle_getattr: Invalid session ptr=0x%p\n",
  323.              (void *)state->session);
  324. @@ -130,7 +138,7 @@ static int handle_getattr(void *daemon_context, nfs41_upcall *upcall)
  325.          status = ERROR_INVALID_PARAMETER;
  326.          goto out;
  327.      }
  328. -#endif /* NFS41_DRIVER_STABILITY_HACKS */
  329. +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
  330.  
  331.      status = nfs41_cached_getattr(state->session, &state->file, &info);
  332.      if (status) {
  333. diff --git a/daemon/open.c b/daemon/open.c
  334. index dc82ac8..4939411 100644
  335. --- a/daemon/open.c
  336. +++ b/daemon/open.c
  337. @@ -86,6 +86,14 @@ static void open_state_free(
  338.  {
  339.      struct list_entry *entry, *tmp;
  340.  
  341. +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
  342. +    /*
  343. +     * Remember the pointer here so we can reject it
  344. +     * later in |parse_getattr()|
  345. +     */
  346. +    debug_ptr_add_recently_deleted(state);
  347. +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
  348. +
  349.      EASSERT(waitcriticalsection(&state->ea.lock) == TRUE);
  350.      EASSERT(waitcriticalsection(&state->locks.lock) == TRUE);
  351.  
  352. @@ -105,15 +113,19 @@ static void open_state_free(
  353.  
  354.      EASSERT(state->ref_count == 0);
  355.  
  356. +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
  357. +    (void)memset(state, 0, sizeof(nfs41_open_state));
  358. +    debug_delayed_free(state);
  359. +#else
  360.      free(state);
  361. +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
  362.  }
  363.  
  364. -
  365.  /* open state reference counting */
  366.  void nfs41_open_state_ref(
  367.      IN nfs41_open_state *state)
  368.  {
  369. -#ifdef NFS41_DRIVER_STABILITY_HACKS
  370. +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
  371.      /*
  372.       * gisburn: fixme: sometimes this happens under high parallel
  373.       * usage with multiple mounts - but why ?
  374. @@ -129,7 +141,7 @@ void nfs41_open_state_ref(
  375.      EASSERT_IS_VALID_NON_NULL_PTR(state);
  376.      if (!DEBUG_IS_VALID_NON_NULL_PTR(state))
  377.          return;
  378. -#endif /* NFS41_DRIVER_STABILITY_HACKS */
  379. +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
  380.  
  381.      EASSERT(state->ref_count > 0);
  382.  
  383. diff --git a/daemon/upcall.c b/daemon/upcall.c
  384. index 3a7047b..93f720f 100644
  385. --- a/daemon/upcall.c
  386. +++ b/daemon/upcall.c
  387. @@ -3,6 +3,7 @@
  388.   *
  389.   * Olga Kornievskaia <aglo@umich.edu>
  390.   * Casey Bodley <cbodley@umich.edu>
  391. + * Roland Mainz <roland.mainz@nrubsig.org>
  392.   *
  393.   * This library is free software; you can redistribute it and/or modify it
  394.   * under the terms of the GNU Lesser General Public License as published by
  395. @@ -115,8 +116,27 @@ int upcall_parse(
  396.          eprintf("unrecognized upcall opcode %d!\n", upcall->opcode);
  397.          goto out;
  398.      }
  399. +
  400.      if (upcall->root_ref != INVALID_HANDLE_VALUE)
  401.          nfs41_root_ref(upcall->root_ref);
  402. +
  403. +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
  404. +    if (upcall->state_ref != INVALID_HANDLE_VALUE) {
  405. +        if (upcall->state_ref->ref_count == 0) {
  406. +            eprintf("upcall->state_ref(=0x%p).ref_count == 0, "
  407. +                "opcode %d; returning ERROR_INVALID_PARAMETER\n",
  408. +                upcall->state_ref, upcall->opcode);
  409. +            /*
  410. +             * Set |upcall->state_ref| to |INVALID_HANDLE_VALUE|
  411. +             * so that we do not try to dereference it
  412. +             */
  413. +            upcall->state_ref = INVALID_HANDLE_VALUE;
  414. +            status = ERROR_INVALID_PARAMETER;
  415. +            goto out;
  416. +        }
  417. +    }
  418. +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
  419. +
  420.      if (upcall->state_ref != INVALID_HANDLE_VALUE)
  421.          nfs41_open_state_ref(upcall->state_ref);
  422.  
  423. diff --git a/nfs41_build_features.h b/nfs41_build_features.h
  424. index bb87fc0..61a37dd 100644
  425. --- a/nfs41_build_features.h
  426. +++ b/nfs41_build_features.h
  427. @@ -69,4 +69,12 @@
  428.   */
  429.  #define NFS41_DRIVER_STABILITY_HACKS 1
  430.  
  431. +/*
  432. + * NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS - use
  433. + * horrible hacks to improve stabilty because sometimes we use
  434. + * |nfs41_open_state| afer a file close in highly parallel
  435. + * workloads (e.g. building the gcc compiler in parallel).
  436. + */
  437. +#define NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS 1
  438. +
  439.  #endif /* !_NFS41_DRIVER_BUILDFEATURES_ */
  440. --
  441. 2.43.0

Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.

Syntax highlighting:

To highlight particular lines, prefix each line with {%HIGHLIGHT}




All content is user-submitted.
The administrators of this site (kpaste.net) are not responsible for their content.
Abuse reports should be emailed to us at