- From 2a706f0ca2178c06b2642ba8bceed9a6c14ec813 Mon Sep 17 00:00:00 2001
- From: Roland Mainz <roland.mainz@nrubsig.org>
- Date: Fri, 1 Mar 2024 11:13:35 +0100
- Subject: [PATCH 1/4] sys: Work around random crashes in
- |SeImpersonateClientEx()|
- Add workaround for random crashes in |SeImpersonateClientEx()|,
- which catches the kernel exception and returns
- |STATUS_INTERNAL_ERROR| to the caller.
- Signed-off-by: Cedric Blancher <cedric.blancher@gmail.com>
- ---
- sys/nfs41_driver.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 49 insertions(+)
- diff --git a/sys/nfs41_driver.c b/sys/nfs41_driver.c
- index 52c633c..042676d 100644
- --- a/sys/nfs41_driver.c
- +++ b/sys/nfs41_driver.c
- @@ -1364,7 +1364,56 @@ NTSTATUS handle_upcall(
- ULONG cbOut = LowIoContext->ParamsFor.IoCtl.OutputBufferLength;
- unsigned char *pbOut = LowIoContext->ParamsFor.IoCtl.pOutputBuffer;
- +#ifdef NFS41_DRIVER_STABILITY_HACKS
- + /*
- + * Workaround for random crashes like this while compiling
- + * the "gcc" compiler with a highly-parallel build.
- + * Stack trace usually looks like this:
- + * ---- snip ----
- + * nt!SeTokenCanImpersonate+0x47
- + * nt!PsImpersonateClient+0x126
- + * nt!SeImpersonateClientEx+0x35
- + * nfs41_driver!handle_upcall+0x59 [C:\cygwin64\home\roland_mainz\work\msnfs41_uidmapping\ms-nfs41-client\sys\nfs41_driver.c @ 1367]
- + * nfs41_driver!nfs41_upcall+0xe7 [C:\cygwin64\home\roland_mainz\work\msnfs41_uidmapping\ms-nfs41-client\sys\nfs41_driver.c @ 1578]
- + * nfs41_driver!nfs41_DevFcbXXXControlFile+0x128 [C:\cygwin64\home\roland_mainz\work\msnfs41_uidmapping\ms-nfs41-client\sys\nfs41_driver.c @ 2418]
- + * nfs41_driver!RxXXXControlFileCallthru+0x76 [base\fs\rdr2\rdbss\ntdevfcb.c @ 130]
- + * nfs41_driver!RxCommonDevFCBIoCtl+0x58 [base\fs\rdr2\rdbss\ntdevfcb.c @ 491]
- + * nfs41_driver!RxFsdCommonDispatch+0x442 [base\fs\rdr2\rdbss\ntfsd.c @ 848]
- + * nfs41_driver!RxFsdDispatch+0xfd [base\fs\rdr2\rdbss\ntfsd.c @ 442]
- + * nfs41_driver!nfs41_FsdDispatch+0x67 [C:\cygwin64\home\roland_mainz\work\msnfs41_uidmapping\ms-nfs41-client\sys\nfs41_driver.c @ 6863]
- + * nt!IofCallDriver+0x55
- + * mup!MupiCallUncProvider+0xb8
- + * mup!MupStateMachine+0x59
- + * mup!MupFsdIrpPassThrough+0x17e
- + * nt!IofCallDriver+0x55
- + * FLTMGR!FltpDispatch+0xd6
- + * nt!IofCallDriver+0x55
- + * nt!IopSynchronousServiceTail+0x34c
- + * nt!IopXxxControlFile+0xd13
- + * nt!NtDeviceIoControlFile+0x56
- + * nt!KiSystemServiceCopyEnd+0x25
- + * ntdll!NtDeviceIoControlFile+0x14
- + * KERNELBASE!DeviceIoControl+0x6b
- + * KERNEL32!DeviceIoControlImplementation+0x81
- + * nfsd_debug+0xc7b14
- + * nfsd_debug+0xc79fb
- + * nfsd_debug+0x171e80
- + * KERNEL32!BaseThreadInitThunk+0x14
- + * ntdll!RtlUserThreadStart+0x21
- + * ---- snip ----
- + */
- + __try {
- + status = SeImpersonateClientEx(entry->psec_ctx, NULL);
- + } __except(EXCEPTION_EXECUTE_HANDLER) {
- + NTSTATUS code;
- + code = GetExceptionCode();
- + print_error("handle_upcall: Call to SeImpersonateClientEx() "
- + "failed due to exception 0x%0x\n", (int)code);
- + status = STATUS_INTERNAL_ERROR;
- + }
- +#else
- status = SeImpersonateClientEx(entry->psec_ctx, NULL);
- +#endif /* NFS41_DRIVER_STABILITY_HACKS */
- if (status != STATUS_SUCCESS) {
- print_error("SeImpersonateClientEx failed %x\n", status);
- goto out;
- --
- 2.43.0
- From c23ab23ec78638fb3f9c99119a5df823d4aad5d4 Mon Sep 17 00:00:00 2001
- From: Roland Mainz <roland.mainz@nrubsig.org>
- Date: Fri, 1 Mar 2024 11:15:51 +0100
- Subject: [PATCH 2/4] daemon: Add debug infrastructure to catch ptr
- use-after-free cases
- Add debug infrastructure to catch pointer-use-after-free cases.
- Signed-off-by: Cedric Blancher <cedric.blancher@gmail.com>
- ---
- daemon/daemon_debug.c | 60 +++++++++++++++++++++++++++++++++++++++++++
- daemon/daemon_debug.h | 6 +++++
- 2 files changed, 66 insertions(+)
- diff --git a/daemon/daemon_debug.c b/daemon/daemon_debug.c
- index 9941ce2..b1cb396 100644
- --- a/daemon/daemon_debug.c
- +++ b/daemon/daemon_debug.c
- @@ -802,3 +802,63 @@ void print_nfs41_file_info(
- dprintf_out("%s={ %s }\n", label, buf);
- }
- +
- +#define NUM_RECENTLY_DELETED 128
- +static struct
- +{
- + SRWLOCK lock;
- + void *deleted_ptrs[NUM_RECENTLY_DELETED];
- + size_t deleted_index;
- +} ptr_recently_deleted_data = {
- + .lock = SRWLOCK_INIT,
- +};
- +
- +bool debug_ptr_was_recently_deleted(void *in_ptr)
- +{
- + size_t i;
- + bool_t res = false;
- +
- + AcquireSRWLockShared(&ptr_recently_deleted_data.lock);
- + for (i=0 ; i < NUM_RECENTLY_DELETED ; i++) {
- + if (ptr_recently_deleted_data.deleted_ptrs[i] == in_ptr) {
- + res = true;
- + break;
- + }
- + }
- + ReleaseSRWLockShared(&ptr_recently_deleted_data.lock);
- + return res;
- +}
- +
- +void debug_ptr_add_recently_deleted(void *in_ptr)
- +{
- + size_t i;
- +
- + AcquireSRWLockExclusive(&ptr_recently_deleted_data.lock);
- + i = ptr_recently_deleted_data.deleted_index++ % NUM_RECENTLY_DELETED;
- + ptr_recently_deleted_data.deleted_ptrs[i] = in_ptr;
- + ReleaseSRWLockExclusive(&ptr_recently_deleted_data.lock);
- +}
- +
- +#define NUM_DELAY_FREE 2048
- +static struct
- +{
- + SRWLOCK lock;
- + void *free_ptrs[NUM_DELAY_FREE];
- + size_t free_ptrs_index;
- +} debug_delay_free_data = {
- + .lock = SRWLOCK_INIT,
- +};
- +
- +void debug_delayed_free(void *in_ptr)
- +{
- + size_t i;
- +
- + AcquireSRWLockExclusive(&debug_delay_free_data.lock);
- + i = debug_delay_free_data.free_ptrs_index++ % NUM_DELAY_FREE;
- +
- + if (debug_delay_free_data.free_ptrs[i])
- + free(debug_delay_free_data.free_ptrs[i]);
- +
- + debug_delay_free_data.free_ptrs[i] = in_ptr;
- + ReleaseSRWLockExclusive(&debug_delay_free_data.lock);
- +}
- diff --git a/daemon/daemon_debug.h b/daemon/daemon_debug.h
- index 1b693e3..2bb42b6 100644
- --- a/daemon/daemon_debug.h
- +++ b/daemon/daemon_debug.h
- @@ -31,6 +31,7 @@
- #else
- # include <stdlib.h>
- #endif
- +#include <stdbool.h>
- #define DEFAULT_DEBUG_LEVEL 1
- @@ -118,4 +119,9 @@ const char* pnfs_iomode_string(enum pnfs_iomode iomode);
- void dprint_layout(int level, const struct __pnfs_file_layout *layout);
- void dprint_device(int level, const struct __pnfs_file_device *device);
- +bool debug_ptr_was_recently_deleted(void *in_ptr);
- +void debug_ptr_add_recently_deleted(void *in_ptr);
- +
- +void debug_delayed_free(void *in_ptr);
- +
- #endif
- --
- 2.43.0
- From c8ac7d015f66ff46aa582ff516a87f659e9e9468 Mon Sep 17 00:00:00 2001
- From: Roland Mainz <roland.mainz@nrubsig.org>
- Date: Fri, 1 Mar 2024 11:18:05 +0100
- Subject: [PATCH 3/4] daemon: Make sure |ref_count| used by
- |InterlockedIncrement()|&co is |volatile|&aligned
- Make sure |ref_count| used by |InterlockedIncrement()|+
- |InterlockedDecrement()| is |volatile|&aligned to 8 bytes
- as mandated in the Microsoft documentation.
- Signed-off-by: Cedric Blancher <cedric.blancher@gmail.com>
- ---
- daemon/nfs41.h | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
- diff --git a/daemon/nfs41.h b/daemon/nfs41.h
- index c8216dd..698e970 100644
- --- a/daemon/nfs41.h
- +++ b/daemon/nfs41.h
- @@ -84,7 +84,7 @@ typedef struct __nfs41_server {
- nfs41_superblock_list superblocks;
- struct nfs41_name_cache *name_cache;
- struct list_entry entry; /* position in global server list */
- - LONG ref_count;
- + __declspec(align(8)) volatile LONG ref_count;
- } nfs41_server;
- enum delegation_status {
- @@ -99,7 +99,7 @@ typedef struct __nfs41_delegation_state {
- nfs41_path_fh parent;
- nfs41_path_fh file;
- struct list_entry client_entry; /* entry in nfs41_client.delegations */
- - LONG ref_count;
- + __declspec(align(8)) volatile LONG ref_count;
- enum delegation_status status;
- SRWLOCK lock;
- @@ -138,7 +138,7 @@ typedef struct __nfs41_open_state {
- struct __pnfs_layout_state *layout;
- struct list_entry client_entry; /* entry in nfs41_client.opens */
- SRWLOCK lock;
- - LONG ref_count;
- + __declspec(align(8)) volatile LONG ref_count;
- uint32_t share_access;
- uint32_t share_deny;
- uint64_t pnfs_last_offset; /* for layoutcommit */
- @@ -296,7 +296,7 @@ typedef struct __nfs41_root {
- struct list_entry clients;
- uint32_t wsize;
- uint32_t rsize;
- - LONG ref_count;
- + __declspec(align(8)) volatile LONG ref_count;
- uint32_t uid;
- uint32_t gid;
- DWORD sec_flavor;
- --
- 2.43.0
- From 8fb13d8365d6dd906023fc7ca610f110f41e67b8 Mon Sep 17 00:00:00 2001
- From: Roland Mainz <roland.mainz@nrubsig.org>
- Date: Fri, 1 Mar 2024 11:31:31 +0100
- Subject: [PATCH 4/4] daemon: Add workaround/hack for getattr-afer-file-close
- Add workaround/hack for getattr-afer-file-close, where
- a |upcall->state_ref| refers to a already deleted
- |nfs41_open_state| with a |ref_count| of |0|.
- Tthis happens because the Windows filesystem driver architecture
- is highly multithreaded, so one thread can do the file close
- request in the userland daemon, while in parallel another
- thread can begin the getattr, which leads to this kind of race
- condition.
- All code related to this hack is within |#ifdef
- NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS ... #endif|
- to make sure we can properly remove it once we find
- a real fix.
- Signed-off-by: Cedric Blancher <cedric.blancher@gmail.com>
- ---
- daemon/getattr.c | 16 ++++++++++++----
- daemon/open.c | 18 +++++++++++++++---
- daemon/upcall.c | 20 ++++++++++++++++++++
- nfs41_build_features.h | 8 ++++++++
- 4 files changed, 55 insertions(+), 7 deletions(-)
- diff --git a/daemon/getattr.c b/daemon/getattr.c
- index b85a87a..325cb9e 100644
- --- a/daemon/getattr.c
- +++ b/daemon/getattr.c
- @@ -3,6 +3,7 @@
- *
- * Olga Kornievskaia <aglo@umich.edu>
- * Casey Bodley <cbodley@umich.edu>
- + * Roland Mainz <roland.mainz@nrubsig.org>
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published by
- @@ -61,13 +62,20 @@ static int parse_getattr(unsigned char *buffer, uint32_t length, nfs41_upcall *u
- {
- int status;
- -#ifdef NFS41_DRIVER_STABILITY_HACKS
- +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
- EASSERT(length > 4);
- if (length <= 4) {
- status = ERROR_INVALID_PARAMETER;
- goto out;
- }
- + if (debug_ptr_was_recently_deleted(upcall->state_ref)) {
- + eprintf("parse_getattr: upcall->state_ref(=0x%p) was "
- + "recently deleted\n", upcall->state_ref);
- + status = ERROR_INVALID_PARAMETER;
- + goto out;
- + }
- +
- EASSERT_IS_VALID_NON_NULL_PTR(upcall->state_ref);
- if (!DEBUG_IS_VALID_NON_NULL_PTR(upcall->state_ref)) {
- status = ERROR_INVALID_PARAMETER;
- @@ -93,7 +101,7 @@ static int parse_getattr(unsigned char *buffer, uint32_t length, nfs41_upcall *u
- goto out;
- }
- EASSERT(upcall->state_ref->ref_count > 0);
- -#endif /* NFS41_DRIVER_STABILITY_HACKS */
- +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
- getattr_upcall_args *args = &upcall->args.getattr;
- status = safe_read(&buffer, &length, &args->query_class, sizeof(args->query_class));
- @@ -115,7 +123,7 @@ static int handle_getattr(void *daemon_context, nfs41_upcall *upcall)
- nfs41_open_state *state = upcall->state_ref;
- nfs41_file_info info = { 0 };
- -#ifdef NFS41_DRIVER_STABILITY_HACKS
- +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
- if (!DEBUG_IS_VALID_NON_NULL_PTR(state->session)) {
- eprintf("handle_getattr: Invalid session ptr=0x%p\n",
- (void *)state->session);
- @@ -130,7 +138,7 @@ static int handle_getattr(void *daemon_context, nfs41_upcall *upcall)
- status = ERROR_INVALID_PARAMETER;
- goto out;
- }
- -#endif /* NFS41_DRIVER_STABILITY_HACKS */
- +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
- status = nfs41_cached_getattr(state->session, &state->file, &info);
- if (status) {
- diff --git a/daemon/open.c b/daemon/open.c
- index dc82ac8..4939411 100644
- --- a/daemon/open.c
- +++ b/daemon/open.c
- @@ -86,6 +86,14 @@ static void open_state_free(
- {
- struct list_entry *entry, *tmp;
- +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
- + /*
- + * Remember the pointer here so we can reject it
- + * later in |parse_getattr()|
- + */
- + debug_ptr_add_recently_deleted(state);
- +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
- +
- EASSERT(waitcriticalsection(&state->ea.lock) == TRUE);
- EASSERT(waitcriticalsection(&state->locks.lock) == TRUE);
- @@ -105,15 +113,19 @@ static void open_state_free(
- EASSERT(state->ref_count == 0);
- +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
- + (void)memset(state, 0, sizeof(nfs41_open_state));
- + debug_delayed_free(state);
- +#else
- free(state);
- +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
- }
- -
- /* open state reference counting */
- void nfs41_open_state_ref(
- IN nfs41_open_state *state)
- {
- -#ifdef NFS41_DRIVER_STABILITY_HACKS
- +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
- /*
- * gisburn: fixme: sometimes this happens under high parallel
- * usage with multiple mounts - but why ?
- @@ -129,7 +141,7 @@ void nfs41_open_state_ref(
- EASSERT_IS_VALID_NON_NULL_PTR(state);
- if (!DEBUG_IS_VALID_NON_NULL_PTR(state))
- return;
- -#endif /* NFS41_DRIVER_STABILITY_HACKS */
- +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
- EASSERT(state->ref_count > 0);
- diff --git a/daemon/upcall.c b/daemon/upcall.c
- index 3a7047b..93f720f 100644
- --- a/daemon/upcall.c
- +++ b/daemon/upcall.c
- @@ -3,6 +3,7 @@
- *
- * Olga Kornievskaia <aglo@umich.edu>
- * Casey Bodley <cbodley@umich.edu>
- + * Roland Mainz <roland.mainz@nrubsig.org>
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published by
- @@ -115,8 +116,27 @@ int upcall_parse(
- eprintf("unrecognized upcall opcode %d!\n", upcall->opcode);
- goto out;
- }
- +
- if (upcall->root_ref != INVALID_HANDLE_VALUE)
- nfs41_root_ref(upcall->root_ref);
- +
- +#ifdef NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS
- + if (upcall->state_ref != INVALID_HANDLE_VALUE) {
- + if (upcall->state_ref->ref_count == 0) {
- + eprintf("upcall->state_ref(=0x%p).ref_count == 0, "
- + "opcode %d; returning ERROR_INVALID_PARAMETER\n",
- + upcall->state_ref, upcall->opcode);
- + /*
- + * Set |upcall->state_ref| to |INVALID_HANDLE_VALUE|
- + * so that we do not try to dereference it
- + */
- + upcall->state_ref = INVALID_HANDLE_VALUE;
- + status = ERROR_INVALID_PARAMETER;
- + goto out;
- + }
- + }
- +#endif /* NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS */
- +
- if (upcall->state_ref != INVALID_HANDLE_VALUE)
- nfs41_open_state_ref(upcall->state_ref);
- diff --git a/nfs41_build_features.h b/nfs41_build_features.h
- index bb87fc0..61a37dd 100644
- --- a/nfs41_build_features.h
- +++ b/nfs41_build_features.h
- @@ -69,4 +69,12 @@
- */
- #define NFS41_DRIVER_STABILITY_HACKS 1
- +/*
- + * NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS - use
- + * horrible hacks to improve stabilty because sometimes we use
- + * |nfs41_open_state| afer a file close in highly parallel
- + * workloads (e.g. building the gcc compiler in parallel).
- + */
- +#define NFS41_DRIVER_WORKAROUND_FOR_GETATTR_AFTER_CLOSE_HACKS 1
- +
- #endif /* !_NFS41_DRIVER_BUILDFEATURES_ */
- --
- 2.43.0
msnfs41client: Patches for getattrafterclose-workaround, 2024-03-01
Posted by Anonymous on Fri 1st Mar 2024 11:38
raw | new post
Submit a correction or amendment below (click here to make a fresh posting)
After submitting an amendment, you'll be able to view the differences between the old and new posts easily.