41 files changed, 1837 insertions, 804 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index c5e84fbcb30b..e96089499371 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -409,7 +409,11 @@ config HAS_DMA
 	depends on !NO_DMA
 	default y
 
-config DMA_NOOP_OPS
+config SGL_ALLOC
+	bool
+	default n
+
+config DMA_DIRECT_OPS
 	bool
 	depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
 	default n
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9d5b78aad4c5..6088408ef26c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -217,7 +217,7 @@ config ENABLE_MUST_CHECK
 config FRAME_WARN
 	int "Warn for stack frames larger than (needs gcc 4.4)"
 	range 0 8192
-	default 0 if KASAN
+	default 3072 if KASAN_EXTRA
 	default 2048 if GCC_PLUGIN_LATENT_ENTROPY
 	default 1280 if (!64BIT && PARISC)
 	default 1024 if (!64BIT && !PARISC)
@@ -351,7 +351,6 @@ config SECTION_MISMATCH_WARN_ONLY
 #
 config ARCH_WANT_FRAME_POINTERS
 	bool
-	help
 
 config FRAME_POINTER
 	bool "Compile the kernel with frame pointers"
@@ -1500,6 +1499,10 @@ config FAULT_INJECTION
 	  Provide fault-injection framework.
 	  For more details, see Documentation/fault-injection/.
 
+config FUNCTION_ERROR_INJECTION
+	def_bool y
+	depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES
+
 config FAILSLAB
 	bool "Fault-injection capability for kmalloc"
 	depends on FAULT_INJECTION
@@ -1547,6 +1550,16 @@ config FAIL_FUTEX
 	help
 	  Provide fault-injection capability for futexes.
 
+config FAIL_FUNCTION
+	bool "Fault-injection capability for functions"
+	depends on FAULT_INJECTION_DEBUG_FS && FUNCTION_ERROR_INJECTION
+	help
+	  Provide function-based fault-injection capability.
+	  This will allow you to override a specific function with a return
+	  with given return value. As a result, function caller will see
+	  an error value and have to handle it. This is useful to test the
+	  error handling in various subsystems.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
@@ -1627,7 +1640,10 @@ config DMA_API_DEBUG
 
 	  If unsure, say N.
 
-menu "Runtime Testing"
+menuconfig RUNTIME_TESTING_MENU
+	bool "Runtime Testing"
+
+if RUNTIME_TESTING_MENU
 
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
@@ -1827,7 +1843,7 @@ config TEST_BPF
 
 	  If unsure, say N.
 
-config TEST_FIND_BIT
+config FIND_BIT_BENCHMARK
 	tristate "Test find_bit functions"
 	default n
 	help
@@ -1915,7 +1931,7 @@ config TEST_DEBUG_VIRTUAL
 
 	  If unsure, say N.
 
-endmenu # runtime tests
+endif # RUNTIME_TESTING_MENU
 
 config MEMTEST
 	bool "Memtest"
@@ -1952,7 +1968,7 @@ config STRICT_DEVMEM
 	bool "Filter access to /dev/mem"
 	depends on MMU && DEVMEM
 	depends on ARCH_HAS_DEVMEM_IS_ALLOWED
-	default y if TILE || PPC
+	default y if TILE || PPC || X86 || ARM64
 	---help---
 	  If this option is disabled, you allow userspace (root) access to all
 	  of memory, including kernel and userspace memory. Accidental
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index bd38aab05929..3d35d062970d 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -20,6 +20,17 @@ config KASAN
 	  Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB
 	  (the resulting kernel does not boot).
 
+config KASAN_EXTRA
+	bool "KAsan: extra checks"
+	depends on KASAN && DEBUG_KERNEL && !COMPILE_TEST
+	help
+	  This enables further checks in the kernel address sanitizer, for now
+	  it only includes the address-use-after-scope check that can lead
+	  to excessive kernel stack usage, frame size warnings and longer
+	  compile time.
+	  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 has more
+
+
 choice
 	prompt "Instrumentation type"
 	depends on KASAN
diff --git a/lib/Makefile b/lib/Makefile
index d11c48ec8ffd..a90d4fcd748f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -28,7 +28,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
-lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o
+lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
 lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
 
 lib-y	+= kobject.o klist.o
@@ -39,15 +39,15 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
-	 once.o refcount.o usercopy.o errseq.o
+	 once.o refcount.o usercopy.o errseq.o bucket_locks.o
 obj-$(CONFIG_STRING_SELFTEST) += test_string.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += hexdump.o
 obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
+obj-$(CONFIG_FIND_BIT_BENCHMARK) += find_bit_benchmark.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
-obj-$(CONFIG_TEST_FIND_BIT) += test_find_bit.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
@@ -149,6 +149,7 @@ obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
 obj-$(CONFIG_MEMORY_NOTIFIER_ERROR_INJECT) += memory-notifier-error-inject.o
 obj-$(CONFIG_OF_RECONFIG_NOTIFIER_ERROR_INJECT) += \
 	of-reconfig-notifier-error-inject.o
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
 lib-$(CONFIG_GENERIC_BUG) += bug.o
 
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index b77d51da8c73..c6659cb37033 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -38,12 +38,10 @@ begin_node:
 	if (assoc_array_ptr_is_shortcut(cursor)) {
 		/* Descend through a shortcut */
 		shortcut = assoc_array_ptr_to_shortcut(cursor);
-		smp_read_barrier_depends();
-		cursor = READ_ONCE(shortcut->next_node);
+		cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */
 	}
 
 	node = assoc_array_ptr_to_node(cursor);
-	smp_read_barrier_depends();
 	slot = 0;
 
 	/* We perform two passes of each node.
@@ -55,15 +53,12 @@ begin_node:
 	 */
 	has_meta = 0;
 	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
-		ptr = READ_ONCE(node->slots[slot]);
+		ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
 		has_meta |= (unsigned long)ptr;
 		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
-			/* We need a barrier between the read of the pointer
-			 * and dereferencing the pointer - but only if we are
-			 * actually going to dereference it.
+			/* We need a barrier between the read of the pointer,
+			 * which is supplied by the above READ_ONCE().
 			 */
-			smp_read_barrier_depends();
-
 			/* Invoke the callback */
 			ret = iterator(assoc_array_ptr_to_leaf(ptr),
 				       iterator_data);
@@ -86,10 +81,8 @@ begin_node:
 
 continue_node:
 	node = assoc_array_ptr_to_node(cursor);
-	smp_read_barrier_depends();
-
 	for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
-		ptr = READ_ONCE(node->slots[slot]);
+		ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
 		if (assoc_array_ptr_is_meta(ptr)) {
 			cursor = ptr;
 			goto begin_node;
@@ -98,16 +91,15 @@ continue_node:
 
 finished_node:
 	/* Move up to the parent (may need to skip back over a shortcut) */
-	parent = READ_ONCE(node->back_pointer);
+	parent = READ_ONCE(node->back_pointer); /* Address dependency. */
 	slot = node->parent_slot;
 	if (parent == stop)
 		return 0;
 
 	if (assoc_array_ptr_is_shortcut(parent)) {
 		shortcut = assoc_array_ptr_to_shortcut(parent);
-		smp_read_barrier_depends();
 		cursor = parent;
-		parent = READ_ONCE(shortcut->back_pointer);
+		parent = READ_ONCE(shortcut->back_pointer); /* Address dependency. */
 		slot = shortcut->parent_slot;
 		if (parent == stop)
 			return 0;
@@ -147,7 +139,7 @@ int assoc_array_iterate(const struct assoc_array *array,
 					void *iterator_data),
 			void *iterator_data)
 {
-	struct assoc_array_ptr *root = READ_ONCE(array->root);
+	struct assoc_array_ptr *root = READ_ONCE(array->root); /* Address dependency. */
 
 	if (!root)
 		return 0;
@@ -194,7 +186,7 @@ assoc_array_walk(const struct assoc_array *array,
 
 	pr_devel("-->%s()\n", __func__);
 
-	cursor = READ_ONCE(array->root);
+	cursor = READ_ONCE(array->root);  /* Address dependency. */
 	if (!cursor)
 		return assoc_array_walk_tree_empty;
 
@@ -216,11 +208,9 @@ jumped:
 
 consider_node:
 	node = assoc_array_ptr_to_node(cursor);
-	smp_read_barrier_depends();
-
 	slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
 	slot &= ASSOC_ARRAY_FAN_MASK;
-	ptr = READ_ONCE(node->slots[slot]);
+	ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
 
 	pr_devel("consider slot %x [ix=%d type=%lu]\n",
 		 slot, level, (unsigned long)ptr & 3);
@@ -254,7 +244,6 @@ consider_node:
 	cursor = ptr;
 follow_shortcut:
 	shortcut = assoc_array_ptr_to_shortcut(cursor);
-	smp_read_barrier_depends();
 	pr_devel("shortcut to %d\n", shortcut->skip_to_level);
 	sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
 	BUG_ON(sc_level > shortcut->skip_to_level);
@@ -294,7 +283,7 @@ follow_shortcut:
 	} while (sc_level < shortcut->skip_to_level);
 
 	/* The shortcut matches the leaf's index to this point. */
-	cursor = READ_ONCE(shortcut->next_node);
+	cursor = READ_ONCE(shortcut->next_node); /* Address dependency. */
 	if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
 		level = sc_level;
 		goto jumped;
@@ -331,20 +320,18 @@ void *assoc_array_find(const struct assoc_array *array,
 		return NULL;
 
 	node = result.terminal_node.node;
-	smp_read_barrier_depends();
 
 	/* If the target key is available to us, it's has to be pointed to by
 	 * the terminal node.
 	 */
 	for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
-		ptr = READ_ONCE(node->slots[slot]);
+		ptr = READ_ONCE(node->slots[slot]); /* Address dependency. */
 		if (ptr && assoc_array_ptr_is_leaf(ptr)) {
 			/* We need a barrier between the read of the pointer
 			 * and dereferencing the pointer - but only if we are
 			 * actually going to dereference it.
 			 */
 			leaf = assoc_array_ptr_to_leaf(ptr);
-			smp_read_barrier_depends();
 			if (ops->compare_object(leaf, index_key))
 				return (void *)leaf;
 		}
diff --git a/lib/bitmap.c b/lib/bitmap.c
index d8f0c094b18e..9e498c77ed0e 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -1106,111 +1106,80 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 EXPORT_SYMBOL(bitmap_allocate_region);
 
 /**
- * bitmap_from_u32array - copy the contents of a u32 array of bits to bitmap
- *	@bitmap: array of unsigned longs, the destination bitmap, non NULL
- *	@nbits: number of bits in @bitmap
- *	@buf: array of u32 (in host byte order), the source bitmap, non NULL
- *	@nwords: number of u32 words in @buf
- *
- * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining
- * bits between nword and nbits in @bitmap (if any) are cleared. In
- * last word of @bitmap, the bits beyond nbits (if any) are kept
- * unchanged.
+ * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order.
+ * @dst:   destination buffer
+ * @src:   bitmap to copy
+ * @nbits: number of bits in the bitmap
  *
- * Return the number of bits effectively copied.
+ * Require nbits % BITS_PER_LONG == 0.
  */
-unsigned int
-bitmap_from_u32array(unsigned long *bitmap, unsigned int nbits,
-		     const u32 *buf, unsigned int nwords)
+#ifdef __BIG_ENDIAN
+void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits)
 {
-	unsigned int dst_idx, src_idx;
-
-	for (src_idx = dst_idx = 0; dst_idx < BITS_TO_LONGS(nbits); ++dst_idx) {
-		unsigned long part = 0;
-
-		if (src_idx < nwords)
-			part = buf[src_idx++];
-
-#if BITS_PER_LONG == 64
-		if (src_idx < nwords)
-			part |= ((unsigned long) buf[src_idx++]) << 32;
-#endif
-
-		if (dst_idx < nbits/BITS_PER_LONG)
-			bitmap[dst_idx] = part;
-		else {
-			unsigned long mask = BITMAP_LAST_WORD_MASK(nbits);
+	unsigned int i;
 
-			bitmap[dst_idx] = (bitmap[dst_idx] & ~mask)
-				| (part & mask);
-		}
+	for (i = 0; i < nbits/BITS_PER_LONG; i++) {
+		if (BITS_PER_LONG == 64)
+			dst[i] = cpu_to_le64(src[i]);
+		else
+			dst[i] = cpu_to_le32(src[i]);
 	}
-
-	return min_t(unsigned int, nbits, 32*nwords);
 }
-EXPORT_SYMBOL(bitmap_from_u32array);
+EXPORT_SYMBOL(bitmap_copy_le);
+#endif
 
+#if BITS_PER_LONG == 64
 /**
- * bitmap_to_u32array - copy the contents of bitmap to a u32 array of bits
- *	@buf: array of u32 (in host byte order), the dest bitmap, non NULL
- *	@nwords: number of u32 words in @buf
- *	@bitmap: array of unsigned longs, the source bitmap, non NULL
+ * bitmap_from_arr32 - copy the contents of u32 array of bits to bitmap
+ *	@bitmap: array of unsigned longs, the destination bitmap
+ *	@buf: array of u32 (in host byte order), the source bitmap
  *	@nbits: number of bits in @bitmap
- *
- * copy min(nbits, 32*nwords) bits from @bitmap to @buf. Remaining
- * bits after nbits in @buf (if any) are cleared.
- *
- * Return the number of bits effectively copied.
  */
-unsigned int
-bitmap_to_u32array(u32 *buf, unsigned int nwords,
-		   const unsigned long *bitmap, unsigned int nbits)
+void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf,
+						unsigned int nbits)
 {
-	unsigned int dst_idx = 0, src_idx = 0;
-
-	while (dst_idx < nwords) {
-		unsigned long part = 0;
-
-		if (src_idx < BITS_TO_LONGS(nbits)) {
-			part = bitmap[src_idx];
-			if (src_idx >= nbits/BITS_PER_LONG)
-				part &= BITMAP_LAST_WORD_MASK(nbits);
-			src_idx++;
-		}
+	unsigned int i, halfwords;
 
-		buf[dst_idx++] = part & 0xffffffffUL;
+	if (!nbits)
+		return;
 
-#if BITS_PER_LONG == 64
-		if (dst_idx < nwords) {
-			part >>= 32;
-			buf[dst_idx++] = part & 0xffffffffUL;
-		}
-#endif
+	halfwords = DIV_ROUND_UP(nbits, 32);
+	for (i = 0; i < halfwords; i++) {
+		bitmap[i/2] = (unsigned long) buf[i];
+		if (++i < halfwords)
+			bitmap[i/2] |= ((unsigned long) buf[i]) << 32;
 	}
 
-	return min_t(unsigned int, nbits, 32*nwords);
+	/* Clear tail bits in last word beyond nbits. */
+	if (nbits % BITS_PER_LONG)
+		bitmap[(halfwords - 1) / 2] &= BITMAP_LAST_WORD_MASK(nbits);
 }
-EXPORT_SYMBOL(bitmap_to_u32array);
+EXPORT_SYMBOL(bitmap_from_arr32);
 
 /**
- * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order.
- * @dst:   destination buffer
- * @src:   bitmap to copy
- * @nbits: number of bits in the bitmap
- *
- * Require nbits % BITS_PER_LONG == 0.
+ * bitmap_to_arr32 - copy the contents of bitmap to a u32 array of bits
+ *	@buf: array of u32 (in host byte order), the dest bitmap
+ *	@bitmap: array of unsigned longs, the source bitmap
+ *	@nbits: number of bits in @bitmap
  */
-#ifdef __BIG_ENDIAN
-void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits)
+void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits)
 {
-	unsigned int i;
+	unsigned int i, halfwords;
 
-	for (i = 0; i < nbits/BITS_PER_LONG; i++) {
-		if (BITS_PER_LONG == 64)
-			dst[i] = cpu_to_le64(src[i]);
-		else
-			dst[i] = cpu_to_le32(src[i]);
+	if (!nbits)
+		return;
+
+	halfwords = DIV_ROUND_UP(nbits, 32);
+	for (i = 0; i < halfwords; i++) {
+		buf[i] = (u32) (bitmap[i/2] & UINT_MAX);
+		if (++i < halfwords)
+			buf[i] = (u32) (bitmap[i/2] >> 32);
 	}
+
+	/* Clear tail bits in last element of array beyond nbits. */
+	if (nbits % BITS_PER_LONG)
+		buf[halfwords - 1] &= (u32) (UINT_MAX >> ((-nbits) & 31));
 }
-EXPORT_SYMBOL(bitmap_copy_le);
+EXPORT_SYMBOL(bitmap_to_arr32);
+
 #endif
diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c
new file mode 100644
index 000000000000..266a97c5708b
--- /dev/null
+++ b/lib/bucket_locks.c
@@ -0,0 +1,54 @@
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/* Allocate an array of spinlocks to be accessed by a hash. Two arguments
+ * indicate the number of elements to allocate in the array. max_size
+ * gives the maximum number of elements to allocate. cpu_mult gives
+ * the number of locks per CPU to allocate. The size is rounded up
+ * to a power of 2 to be suitable as a hash table.
+ */
+
+int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
+			   size_t max_size, unsigned int cpu_mult, gfp_t gfp)
+{
+	spinlock_t *tlocks = NULL;
+	unsigned int i, size;
+#if defined(CONFIG_PROVE_LOCKING)
+	unsigned int nr_pcpus = 2;
+#else
+	unsigned int nr_pcpus = num_possible_cpus();
+#endif
+
+	if (cpu_mult) {
+		nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
+		size = min_t(unsigned int, nr_pcpus * cpu_mult, max_size);
+	} else {
+		size = max_size;
+	}
+
+	if (sizeof(spinlock_t) != 0) {
+		if (gfpflags_allow_blocking(gfp))
+			tlocks = kvmalloc(size * sizeof(spinlock_t), gfp);
+		else
+			tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp);
+		if (!tlocks)
+			return -ENOMEM;
+		for (i = 0; i < size; i++)
+			spin_lock_init(&tlocks[i]);
+	}
+
+	*locks = tlocks;
+	*locks_mask = size - 1;
+
+	return 0;
+}
+EXPORT_SYMBOL(alloc_bucket_spinlocks);
+
+void free_bucket_spinlocks(spinlock_t *locks)
+{
+	kvfree(locks);
+}
+EXPORT_SYMBOL(free_bucket_spinlocks);
diff --git a/lib/chacha20.c b/lib/chacha20.c
index 250ceed9ec9a..c1cc50fb68c9 100644
--- a/lib/chacha20.c
+++ b/lib/chacha20.c
@@ -16,12 +16,7 @@
 #include <asm/unaligned.h>
 #include <crypto/chacha20.h>
 
-static inline u32 rotl32(u32 v, u8 n)
-{
-	return (v << n) | (v >> (sizeof(v) * 8 - n));
-}
-
-extern void chacha20_block(u32 *state, void *stream)
+void chacha20_block(u32 *state, u32 *stream)
 {
 	u32 x[16], *out = stream;
 	int i;
@@ -30,45 +25,45 @@ extern void chacha20_block(u32 *state, void *stream)
 		x[i] = state[i];
 
 	for (i = 0; i < 20; i += 2) {
-		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
-		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
-		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
-		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],  16);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],  16);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],  16);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],  16);
 
-		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
-		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
-		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
-		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],  12);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],  12);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10], 12);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11], 12);
 
-		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
-		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
-		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
-		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
+		x[0]  += x[4];    x[12] = rol32(x[12] ^ x[0],   8);
+		x[1]  += x[5];    x[13] = rol32(x[13] ^ x[1],   8);
+		x[2]  += x[6];    x[14] = rol32(x[14] ^ x[2],   8);
+		x[3]  += x[7];    x[15] = rol32(x[15] ^ x[3],   8);
 
-		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
-		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
-		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
-		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
+		x[8]  += x[12];   x[4]  = rol32(x[4]  ^ x[8],   7);
+		x[9]  += x[13];   x[5]  = rol32(x[5]  ^ x[9],   7);
+		x[10] += x[14];   x[6]  = rol32(x[6]  ^ x[10],  7);
+		x[11] += x[15];   x[7]  = rol32(x[7]  ^ x[11],  7);
 
-		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
-		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
-		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
-		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],  16);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],  16);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],  16);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],  16);
 
-		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
-		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
-		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
-		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10], 12);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11], 12);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],  12);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],  12);
 
-		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
-		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
-		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
-		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
+		x[0]  += x[5];    x[15] = rol32(x[15] ^ x[0],   8);
+		x[1]  += x[6];    x[12] = rol32(x[12] ^ x[1],   8);
+		x[2]  += x[7];    x[13] = rol32(x[13] ^ x[2],   8);
+		x[3]  += x[4];    x[14] = rol32(x[14] ^ x[3],   8);
 
-		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
-		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
-		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
-		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
+		x[10] += x[15];   x[5]  = rol32(x[5]  ^ x[10],  7);
+		x[11] += x[12];   x[6]  = rol32(x[6]  ^ x[11],  7);
+		x[8]  += x[13];   x[7]  = rol32(x[7]  ^ x[8],   7);
+		x[9]  += x[14];   x[4]  = rol32(x[4]  ^ x[9],   7);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(x); i++)
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 35fe142ebb5e..beca6244671a 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -33,10 +33,11 @@ EXPORT_SYMBOL(cpumask_next);
 int cpumask_next_and(int n, const struct cpumask *src1p,
 		     const struct cpumask *src2p)
 {
-	while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
-		if (cpumask_test_cpu(n, src2p))
-			break;
-	return n;
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpumask_check(n);
+	return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p),
+		nr_cpumask_bits, n + 1);
 }
 EXPORT_SYMBOL(cpumask_next_and);
 
diff --git a/lib/crc-ccitt.c b/lib/crc-ccitt.c
index 7f6dd68d2d09..d873b34039ff 100644
--- a/lib/crc-ccitt.c
+++ b/lib/crc-ccitt.c
@@ -51,8 +51,49 @@ u16 const crc_ccitt_table[256] = {
 };
 EXPORT_SYMBOL(crc_ccitt_table);
 
+/*
+ * Similar table to calculate CRC16 variant known as CRC-CCITT-FALSE
+ * Reflected bits order, does not augment final value.
+ */
+u16 const crc_ccitt_false_table[256] = {
+    0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
+    0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
+    0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
+    0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
+    0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
+    0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
+    0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
+    0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
+    0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
+    0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
+    0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
+    0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
+    0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
+    0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
+    0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
+    0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
+    0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
+    0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
+    0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
+    0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
+    0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
+    0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
+    0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
+    0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
+    0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
+    0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
+    0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
+    0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
+    0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
+    0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
+    0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
+    0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
+};
+EXPORT_SYMBOL(crc_ccitt_false_table);
+
 /**
- *	crc_ccitt - recompute the CRC for the data buffer
+ *	crc_ccitt - recompute the CRC (CRC-CCITT variant) for the data
+ *	buffer
  *	@crc: previous CRC value
  *	@buffer: data pointer
  *	@len: number of bytes in the buffer
@@ -65,5 +106,20 @@ u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len)
 }
 EXPORT_SYMBOL(crc_ccitt);
 
+/**
+ *	crc_ccitt_false - recompute the CRC (CRC-CCITT-FALSE variant)
+ *	for the data buffer
+ *	@crc: previous CRC value
+ *	@buffer: data pointer
+ *	@len: number of bytes in the buffer
+ */
+u16 crc_ccitt_false(u16 crc, u8 const *buffer, size_t len)
+{
+	while (len--)
+		crc = crc_ccitt_false_byte(crc, *buffer++);
+	return crc;
+}
+EXPORT_SYMBOL(crc_ccitt_false);
+
 MODULE_DESCRIPTION("CRC-CCITT calculations");
 MODULE_LICENSE("GPL");
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
new file mode 100644
index 000000000000..40b1f92f2214
--- /dev/null
+++ b/lib/dma-direct.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DMA operations that map physical memory directly without using an IOMMU or
+ * flushing caches.
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-direct.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-contiguous.h>
+#include <linux/pfn.h>
+
+#define DIRECT_MAPPING_ERROR		0
+
+/*
+ * Most architectures use ZONE_DMA for the first 16 Megabytes, but
+ * some use it for entirely different regions:
+ */
+#ifndef ARCH_ZONE_DMA_BITS
+#define ARCH_ZONE_DMA_BITS 24
+#endif
+
+static bool
+check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
+		const char *caller)
+{
+	if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
+		if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
+			dev_err(dev,
+				"%s: overflow %pad+%zu of device mask %llx\n",
+				caller, &dma_addr, size, *dev->dma_mask);
+		}
+		return false;
+	}
+	return true;
+}
+
+static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
+{
+	return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask;
+}
+
+void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
+{
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	int page_order = get_order(size);
+	struct page *page = NULL;
+
+	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
+	if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+		gfp |= GFP_DMA;
+	if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
+		gfp |= GFP_DMA32;
+
+again:
+	/* CMA can be used only in the context which permits sleeping */
+	if (gfpflags_allow_blocking(gfp)) {
+		page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
+		if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+			dma_release_from_contiguous(dev, page, count);
+			page = NULL;
+		}
+	}
+	if (!page)
+		page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
+
+	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+		__free_pages(page, page_order);
+		page = NULL;
+
+		if (dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
+		    !(gfp & GFP_DMA)) {
+			gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
+			goto again;
+		}
+	}
+
+	if (!page)
+		return NULL;
+
+	*dma_handle = phys_to_dma(dev, page_to_phys(page));
+	memset(page_address(page), 0, size);
+	return page_address(page);
+}
+
+void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t dma_addr, unsigned long attrs)
+{
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
+		free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
+
+	if (!check_addr(dev, dma_addr, size, __func__))
+		return DIRECT_MAPPING_ERROR;
+	return dma_addr;
+}
+
+static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		BUG_ON(!sg_page(sg));
+
+		sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg));
+		if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
+			return 0;
+		sg_dma_len(sg) = sg->length;
+	}
+
+	return nents;
+}
+
+int dma_direct_supported(struct device *dev, u64 mask)
+{
+#ifdef CONFIG_ZONE_DMA
+	if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+		return 0;
+#else
+	/*
+	 * Because 32-bit DMA masks are so common we expect every architecture
+	 * to be able to satisfy them - either by not supporting more physical
+	 * memory, or by providing a ZONE_DMA32.  If neither is the case, the
+	 * architecture needs to use an IOMMU instead of the direct mapping.
+	 */
+	if (mask < DMA_BIT_MASK(32))
+		return 0;
+#endif
+	return 1;
+}
+
+static int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr == DIRECT_MAPPING_ERROR;
+}
+
+const struct dma_map_ops dma_direct_ops = {
+	.alloc			= dma_direct_alloc,
+	.free			= dma_direct_free,
+	.map_page		= dma_direct_map_page,
+	.map_sg			= dma_direct_map_sg,
+	.dma_supported		= dma_direct_supported,
+	.mapping_error		= dma_direct_mapping_error,
+};
+EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/dma-noop.c b/lib/dma-noop.c
deleted file mode 100644
index a10185b0c2d4..000000000000
--- a/lib/dma-noop.c
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *	lib/dma-noop.c
- *
- * DMA operations that map to physical addresses without flushing memory.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-#include <linux/scatterlist.h>
-#include <linux/pfn.h>
-
-static void *dma_noop_alloc(struct device *dev, size_t size,
-			    dma_addr_t *dma_handle, gfp_t gfp,
-			    unsigned long attrs)
-{
-	void *ret;
-
-	ret = (void *)__get_free_pages(gfp, get_order(size));
-	if (ret)
-		*dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset);
-
-	return ret;
-}
-
-static void dma_noop_free(struct device *dev, size_t size,
-			  void *cpu_addr, dma_addr_t dma_addr,
-			  unsigned long attrs)
-{
-	free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page,
-				      unsigned long offset, size_t size,
-				      enum dma_data_direction dir,
-				      unsigned long attrs)
-{
-	return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset);
-}
-
-static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
-			     enum dma_data_direction dir,
-			     unsigned long attrs)
-{
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(sgl, sg, nents, i) {
-		dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset);
-		void *va;
-
-		BUG_ON(!sg_page(sg));
-		va = sg_virt(sg);
-		sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va) - offset;
-		sg_dma_len(sg) = sg->length;
-	}
-
-	return nents;
-}
-
-const struct dma_map_ops dma_noop_ops = {
-	.alloc			= dma_noop_alloc,
-	.free			= dma_noop_free,
-	.map_page		= dma_noop_map_page,
-	.map_sg			= dma_noop_map_sg,
-};
-
-EXPORT_SYMBOL(dma_noop_ops);
diff --git a/lib/error-inject.c b/lib/error-inject.c
new file mode 100644
index 000000000000..c0d4600f4896
--- /dev/null
+++ b/lib/error-inject.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+// error-inject.c: Function-level error injection table
+#include <linux/error-injection.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+/* Whitelist of symbols that can be overridden for error injection. */
+static LIST_HEAD(error_injection_list);
+static DEFINE_MUTEX(ei_mutex);
+struct ei_entry {
+	struct list_head list;
+	unsigned long start_addr;
+	unsigned long end_addr;
+	int etype;
+	void *priv;
+};
+
+bool within_error_injection_list(unsigned long addr)
+{
+	struct ei_entry *ent;
+	bool ret = false;
+
+	mutex_lock(&ei_mutex);
+	list_for_each_entry(ent, &error_injection_list, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr) {
+			ret = true;
+			break;
+		}
+	}
+	mutex_unlock(&ei_mutex);
+	return ret;
+}
+
+int get_injectable_error_type(unsigned long addr)
+{
+	struct ei_entry *ent;
+
+	list_for_each_entry(ent, &error_injection_list, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr)
+			return ent->etype;
+	}
+	return EI_ETYPE_NONE;
+}
+
+/*
+ * Lookup and populate the error_injection_list.
+ *
+ * For safety reasons we only allow certain functions to be overridden with
+ * bpf_error_injection, so we need to populate the list of the symbols that have
+ * been marked as safe for overriding.
+ */
+static void populate_error_injection_list(struct error_injection_entry *start,
+					  struct error_injection_entry *end,
+					  void *priv)
+{
+	struct error_injection_entry *iter;
+	struct ei_entry *ent;
+	unsigned long entry, offset = 0, size = 0;
+
+	mutex_lock(&ei_mutex);
+	for (iter = start; iter < end; iter++) {
+		entry = arch_deref_entry_point((void *)iter->addr);
+
+		if (!kernel_text_address(entry) ||
+		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
+			pr_err("Failed to find error inject entry at %p\n",
+				(void *)entry);
+			continue;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+		if (!ent)
+			break;
+		ent->start_addr = entry;
+		ent->end_addr = entry + size;
+		ent->etype = iter->etype;
+		ent->priv = priv;
+		INIT_LIST_HEAD(&ent->list);
+		list_add_tail(&ent->list, &error_injection_list);
+	}
+	mutex_unlock(&ei_mutex);
+}
+
+/* Markers of the _error_inject_whitelist section */
+extern struct error_injection_entry __start_error_injection_whitelist[];
+extern struct error_injection_entry __stop_error_injection_whitelist[];
+
+static void __init populate_kernel_ei_list(void)
+{
+	populate_error_injection_list(__start_error_injection_whitelist,
+				      __stop_error_injection_whitelist,
+				      NULL);
+}
+
+#ifdef CONFIG_MODULES
+static void module_load_ei_list(struct module *mod)
+{
+	if (!mod->num_ei_funcs)
+		return;
+
+	populate_error_injection_list(mod->ei_funcs,
+				      mod->ei_funcs + mod->num_ei_funcs, mod);
+}
+
+static void module_unload_ei_list(struct module *mod)
+{
+	struct ei_entry *ent, *n;
+
+	if (!mod->num_ei_funcs)
+		return;
+
+	mutex_lock(&ei_mutex);
+	list_for_each_entry_safe(ent, n, &error_injection_list, list) {
+		if (ent->priv == mod) {
+			list_del_init(&ent->list);
+			kfree(ent);
+		}
+	}
+	mutex_unlock(&ei_mutex);
+}
+
+/* Module notifier call back, checking error injection table on the module */
+static int ei_module_callback(struct notifier_block *nb,
+			      unsigned long val, void *data)
+{
+	struct module *mod = data;
+
+	if (val == MODULE_STATE_COMING)
+		module_load_ei_list(mod);
+	else if (val == MODULE_STATE_GOING)
+		module_unload_ei_list(mod);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ei_module_nb = {
+	.notifier_call = ei_module_callback,
+	.priority = 0
+};
+
+static __init int module_ei_init(void)
+{
+	return register_module_notifier(&ei_module_nb);
+}
+#else /* !CONFIG_MODULES */
+#define module_ei_init()	(0)
+#endif
+
+/*
+ * error_injection/whitelist -- shows which functions can be overridden for
+ * error injection.
+ */
+static void *ei_seq_start(struct seq_file *m, loff_t *pos)
+{
+	mutex_lock(&ei_mutex);
+	return seq_list_start(&error_injection_list, *pos);
+}
+
+static void ei_seq_stop(struct seq_file *m, void *v)
+{
+	mutex_unlock(&ei_mutex);
+}
+
+static void *ei_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	return seq_list_next(v, &error_injection_list, pos);
+}
+
+static const char *error_type_string(int etype)
+{
+	switch (etype) {
+	case EI_ETYPE_NULL:
+		return "NULL";
+	case EI_ETYPE_ERRNO:
+		return "ERRNO";
+	case EI_ETYPE_ERRNO_NULL:
+		return "ERRNO_NULL";
+	default:
+		return "(unknown)";
+	}
+}
+
+static int ei_seq_show(struct seq_file *m, void *v)
+{
+	struct ei_entry *ent = list_entry(v, struct ei_entry, list);
+
+	seq_printf(m, "%pf\t%s\n", (void *)ent->start_addr,
+		   error_type_string(ent->etype));
+	return 0;
+}
+
+static const struct seq_operations ei_seq_ops = {
+	.start = ei_seq_start,
+	.next  = ei_seq_next,
+	.stop  = ei_seq_stop,
+	.show  = ei_seq_show,
+};
+
+static int ei_open(struct inode *inode, struct file *filp)
+{
+	return seq_open(filp, &ei_seq_ops);
+}
+
+static const struct file_operations debugfs_ei_ops = {
+	.open           = ei_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = seq_release,
+};
+
+static int __init ei_debugfs_init(void)
+{
+	struct dentry *dir, *file;
+
+	dir = debugfs_create_dir("error_injection", NULL);
+	if (!dir)
+		return -ENOMEM;
+
+	file = debugfs_create_file("list", 0444, dir, NULL, &debugfs_ei_ops);
+	if (!file) {
+		debugfs_remove(dir);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int __init init_error_injection(void)
+{
+	populate_kernel_ei_list();
+
+	if (!module_ei_init())
+		ei_debugfs_init();
+
+	return 0;
+}
+late_initcall(init_error_injection);
diff --git a/lib/errseq.c b/lib/errseq.c
index 79cc66897db4..df782418b333 100644
--- a/lib/errseq.c
+++ b/lib/errseq.c
@@ -46,14 +46,14 @@
  * @eseq: errseq_t field that should be set
  * @err: error to set (must be between -1 and -MAX_ERRNO)
  *
- * This function sets the error in *eseq, and increments the sequence counter
+ * This function sets the error in @eseq, and increments the sequence counter
  * if the last sequence was sampled at some point in the past.
  *
  * Any error set will always overwrite an existing error.
  *
- * We do return the latest value here, primarily for debugging purposes. The
- * return value should not be used as a previously sampled value in later calls
- * as it will not have the SEEN flag set.
+ * Return: The previous value, primarily for debugging purposes. The
+ * return value should not be used as a previously sampled value in later
+ * calls as it will not have the SEEN flag set.
  */
 errseq_t errseq_set(errseq_t *eseq, int err)
 {
@@ -108,11 +108,13 @@ errseq_t errseq_set(errseq_t *eseq, int err)
 EXPORT_SYMBOL(errseq_set);
 
 /**
- * errseq_sample - grab current errseq_t value
- * @eseq: pointer to errseq_t to be sampled
+ * errseq_sample() - Grab current errseq_t value.
+ * @eseq: Pointer to errseq_t to be sampled.
  *
  * This function allows callers to sample an errseq_t value, marking it as
  * "seen" if required.
+ *
+ * Return: The current errseq value.
  */
 errseq_t errseq_sample(errseq_t *eseq)
 {
@@ -134,15 +136,15 @@ errseq_t errseq_sample(errseq_t *eseq)
 EXPORT_SYMBOL(errseq_sample);
 
 /**
- * errseq_check - has an error occurred since a particular sample point?
- * @eseq: pointer to errseq_t value to be checked
- * @since: previously-sampled errseq_t from which to check
+ * errseq_check() - Has an error occurred since a particular sample point?
+ * @eseq: Pointer to errseq_t value to be checked.
+ * @since: Previously-sampled errseq_t from which to check.
  *
- * Grab the value that eseq points to, and see if it has changed "since"
- * the given value was sampled. The "since" value is not advanced, so there
+ * Grab the value that eseq points to, and see if it has changed @since
+ * the given value was sampled. The @since value is not advanced, so there
  * is no need to mark the value as seen.
  *
- * Returns the latest error set in the errseq_t or 0 if it hasn't changed.
+ * Return: The latest error set in the errseq_t or 0 if it hasn't changed.
  */
 int errseq_check(errseq_t *eseq, errseq_t since)
 {
@@ -155,11 +157,11 @@ int errseq_check(errseq_t *eseq, errseq_t since)
 EXPORT_SYMBOL(errseq_check);
 
 /**
- * errseq_check_and_advance - check an errseq_t and advance to current value
- * @eseq: pointer to value being checked and reported
- * @since: pointer to previously-sampled errseq_t to check against and advance
+ * errseq_check_and_advance() - Check an errseq_t and advance to current value.
+ * @eseq: Pointer to value being checked and reported.
+ * @since: Pointer to previously-sampled errseq_t to check against and advance.
  *
- * Grab the eseq value, and see whether it matches the value that "since"
+ * Grab the eseq value, and see whether it matches the value that @since
  * points to. If it does, then just return 0.
  *
  * If it doesn't, then the value has changed. Set the "seen" flag, and try to
@@ -170,6 +172,9 @@ EXPORT_SYMBOL(errseq_check);
  * value. The caller must provide that if necessary. Because of this, callers
  * may want to do a lockless errseq_check before taking the lock and calling
  * this.
+ *
+ * Return: Negative errno if one has been stored, or 0 if no new error has
+ * occurred.
  */
 int errseq_check_and_advance(errseq_t *eseq, errseq_t *since)
 {
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 6ed74f78380c..ee3df93ba69a 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -21,22 +21,29 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 
-#if !defined(find_next_bit) || !defined(find_next_zero_bit)
+#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
+		!defined(find_next_and_bit)
 
 /*
- * This is a common helper function for find_next_bit and
- * find_next_zero_bit.  The difference is the "invert" argument, which
- * is XORed with each fetched word before searching it for one bits.
+ * This is a common helper function for find_next_bit, find_next_zero_bit, and
+ * find_next_and_bit. The differences are:
+ *  - The "invert" argument, which is XORed with each fetched word before
+ *    searching it for one bits.
+ *  - The optional "addr2", which is anded with "addr1" if present.
  */
-static unsigned long _find_next_bit(const unsigned long *addr,
-		unsigned long nbits, unsigned long start, unsigned long invert)
+static inline unsigned long _find_next_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert)
 {
 	unsigned long tmp;
 
 	if (unlikely(start >= nbits))
 		return nbits;
 
-	tmp = addr[start / BITS_PER_LONG] ^ invert;
+	tmp = addr1[start / BITS_PER_LONG];
+	if (addr2)
+		tmp &= addr2[start / BITS_PER_LONG];
+	tmp ^= invert;
 
 	/* Handle 1st word. */
 	tmp &= BITMAP_FIRST_WORD_MASK(start);
@@ -47,7 +54,10 @@ static unsigned long _find_next_bit(const unsigned long *addr,
 		if (start >= nbits)
 			return nbits;
 
-		tmp = addr[start / BITS_PER_LONG] ^ invert;
+		tmp = addr1[start / BITS_PER_LONG];
+		if (addr2)
+			tmp &= addr2[start / BITS_PER_LONG];
+		tmp ^= invert;
 	}
 
 	return min(start + __ffs(tmp), nbits);
@@ -61,7 +71,7 @@ static unsigned long _find_next_bit(const unsigned long *addr,
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
-	return _find_next_bit(addr, size, offset, 0UL);
+	return _find_next_bit(addr, NULL, size, offset, 0UL);
 }
 EXPORT_SYMBOL(find_next_bit);
 #endif
@@ -70,11 +80,21 @@ EXPORT_SYMBOL(find_next_bit);
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
-	return _find_next_bit(addr, size, offset, ~0UL);
+	return _find_next_bit(addr, NULL, size, offset, ~0UL);
 }
 EXPORT_SYMBOL(find_next_zero_bit);
 #endif
 
+#if !defined(find_next_and_bit)
+unsigned long find_next_and_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long size,
+		unsigned long offset)
+{
+	return _find_next_bit(addr1, addr2, size, offset, 0UL);
+}
+EXPORT_SYMBOL(find_next_and_bit);
+#endif
+
 #ifndef find_first_bit
 /*
  * Find the first set bit in a memory region.
@@ -146,15 +166,19 @@ static inline unsigned long ext2_swab(const unsigned long y)
 }
 
 #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
-static unsigned long _find_next_bit_le(const unsigned long *addr,
-		unsigned long nbits, unsigned long start, unsigned long invert)
+static inline unsigned long _find_next_bit_le(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert)
 {
 	unsigned long tmp;
 
 	if (unlikely(start >= nbits))
 		return nbits;
 
-	tmp = addr[start / BITS_PER_LONG] ^ invert;
+	tmp = addr1[start / BITS_PER_LONG];
+	if (addr2)
+		tmp &= addr2[start / BITS_PER_LONG];
+	tmp ^= invert;
 
 	/* Handle 1st word. */
 	tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
@@ -165,7 +189,10 @@ static unsigned long _find_next_bit_le(const unsigned long *addr,
 		if (start >= nbits)
 			return nbits;
 
-		tmp = addr[start / BITS_PER_LONG] ^ invert;
+		tmp = addr1[start / BITS_PER_LONG];
+		if (addr2)
+			tmp &= addr2[start / BITS_PER_LONG];
+		tmp ^= invert;
 	}
 
 	return min(start + __ffs(ext2_swab(tmp)), nbits);
@@ -176,7 +203,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr,
 unsigned long find_next_zero_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	return _find_next_bit_le(addr, size, offset, ~0UL);
+	return _find_next_bit_le(addr, NULL, size, offset, ~0UL);
 }
 EXPORT_SYMBOL(find_next_zero_bit_le);
 #endif
@@ -185,7 +212,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
 unsigned long find_next_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	return _find_next_bit_le(addr, size, offset, 0UL);
+	return _find_next_bit_le(addr, NULL, size, offset, 0UL);
 }
 EXPORT_SYMBOL(find_next_bit_le);
 #endif
diff --git a/lib/test_find_bit.c b/lib/find_bit_benchmark.c
index f4394a36f9aa..5985a25e6cbc 100644
--- a/lib/test_find_bit.c
+++ b/lib/find_bit_benchmark.c
@@ -35,6 +35,7 @@
 #define SPARSE		500
 
 static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata;
+static DECLARE_BITMAP(bitmap2, BITMAP_LEN) __initdata;
 
 /*
  * This is Schlemiel the Painter's algorithm. It should be called after
@@ -43,16 +44,15 @@ static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata;
 static int __init test_find_first_bit(void *bitmap, unsigned long len)
 {
 	unsigned long i, cnt;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	for (cnt = i = 0; i < len; cnt++) {
 		i = find_first_bit(bitmap, len);
 		__clear_bit(i, bitmap);
 	}
-	cycles = get_cycles() - cycles;
-	pr_err("find_first_bit:\t\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	time = ktime_get() - time;
+	pr_err("find_first_bit:     %18llu ns, %6ld iterations\n", time, cnt);
 
 	return 0;
 }
@@ -60,14 +60,13 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len)
 static int __init test_find_next_bit(const void *bitmap, unsigned long len)
 {
 	unsigned long i, cnt;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
 		i = find_next_bit(bitmap, BITMAP_LEN, i) + 1;
-	cycles = get_cycles() - cycles;
-	pr_err("find_next_bit:\t\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	time = ktime_get() - time;
+	pr_err("find_next_bit:      %18llu ns, %6ld iterations\n", time, cnt);
 
 	return 0;
 }
@@ -75,14 +74,13 @@ static int __init test_find_next_bit(const void *bitmap, unsigned long len)
 static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len)
 {
 	unsigned long i, cnt;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
 		i = find_next_zero_bit(bitmap, len, i) + 1;
-	cycles = get_cycles() - cycles;
-	pr_err("find_next_zero_bit:\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	time = ktime_get() - time;
+	pr_err("find_next_zero_bit: %18llu ns, %6ld iterations\n", time, cnt);
 
 	return 0;
 }
@@ -90,9 +88,9 @@ static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len)
 static int __init test_find_last_bit(const void *bitmap, unsigned long len)
 {
 	unsigned long l, cnt = 0;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	do {
 		cnt++;
 		l = find_last_bit(bitmap, len);
@@ -100,9 +98,24 @@ static int __init test_find_last_bit(const void *bitmap, unsigned long len)
 			break;
 		len = l;
 	} while (len);
+	time = ktime_get() - time;
+	pr_err("find_last_bit:      %18llu ns, %6ld iterations\n", time, cnt);
+
+	return 0;
+}
+
+static int __init test_find_next_and_bit(const void *bitmap,
+		const void *bitmap2, unsigned long len)
+{
+	unsigned long i, cnt;
+	cycles_t cycles;
+
+	cycles = get_cycles();
+	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
+		i = find_next_and_bit(bitmap, bitmap2, BITMAP_LEN, i+1);
 	cycles = get_cycles() - cycles;
-	pr_err("find_last_bit:\t\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	pr_err("find_next_and_bit:\t\t%llu cycles, %ld iterations\n",
+		(u64)cycles, cnt);
 
 	return 0;
 }
@@ -114,31 +127,36 @@ static int __init find_bit_test(void)
 	pr_err("\nStart testing find_bit() with random-filled bitmap\n");
 
 	get_random_bytes(bitmap, sizeof(bitmap));
+	get_random_bytes(bitmap2, sizeof(bitmap2));
 
 	test_find_next_bit(bitmap, BITMAP_LEN);
 	test_find_next_zero_bit(bitmap, BITMAP_LEN);
 	test_find_last_bit(bitmap, BITMAP_LEN);
 	test_find_first_bit(bitmap, BITMAP_LEN);
+	test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
 
 	pr_err("\nStart testing find_bit() with sparse bitmap\n");
 
 	bitmap_zero(bitmap, BITMAP_LEN);
+	bitmap_zero(bitmap2, BITMAP_LEN);
 
-	while (nbits--)
+	while (nbits--) {
 		__set_bit(prandom_u32() % BITMAP_LEN, bitmap);
+		__set_bit(prandom_u32() % BITMAP_LEN, bitmap2);
+	}
 
 	test_find_next_bit(bitmap, BITMAP_LEN);
 	test_find_next_zero_bit(bitmap, BITMAP_LEN);
 	test_find_last_bit(bitmap, BITMAP_LEN);
 	test_find_first_bit(bitmap, BITMAP_LEN);
+	test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
 
-	return 0;
+	/*
+	 * Everything is OK. Return error just to let user run benchmark
+	 * again without annoying rmmod.
+	 */
+	return -EINVAL;
 }
 module_init(find_bit_test);
 
-static void __exit test_find_bit_cleanup(void)
-{
-}
-module_exit(test_find_bit_cleanup);
-
 MODULE_LICENSE("GPL");
diff --git a/lib/idr.c b/lib/idr.c
index 2593ce513a18..c98d77fcf393 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -1,4 +1,5 @@
 #include <linux/bitmap.h>
+#include <linux/bug.h>
 #include <linux/export.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
@@ -7,71 +8,184 @@
 DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
 static DEFINE_SPINLOCK(simple_ida_lock);
 
-int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index,
-		  unsigned long start, unsigned long end, gfp_t gfp,
-		  bool ext)
+/**
+ * idr_alloc_u32() - Allocate an ID.
+ * @idr: IDR handle.
+ * @ptr: Pointer to be associated with the new ID.
+ * @nextid: Pointer to an ID.
+ * @max: The maximum ID to allocate (inclusive).
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @nextid and @max.
+ * Note that @max is inclusive whereas the @end parameter to idr_alloc()
+ * is exclusive.  The new ID is assigned to @nextid before the pointer
+ * is inserted into the IDR, so if @nextid points into the object pointed
+ * to by @ptr, a concurrent lookup will not find an uninitialised ID.
+ *
+ * The caller should provide their own locking to ensure that two
+ * concurrent modifications to the IDR are not possible.  Read-only
+ * accesses to the IDR may be done under the RCU read lock or may
+ * exclude simultaneous writers.
+ *
+ * Return: 0 if an ID was allocated, -ENOMEM if memory allocation failed,
+ * or -ENOSPC if no free IDs could be found.  If an error occurred,
+ * @nextid is unchanged.
+ */
+int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
+			unsigned long max, gfp_t gfp)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	int base = idr->idr_base;
+	int id = *nextid;
 
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return -EINVAL;
+	if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
+		idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
 
-	radix_tree_iter_init(&iter, start);
-	if (ext)
-		slot = idr_get_free_ext(&idr->idr_rt, &iter, gfp, end);
-	else
-		slot = idr_get_free(&idr->idr_rt, &iter, gfp, end);
+	id = (id < base) ? 0 : id - base;
+	radix_tree_iter_init(&iter, id);
+	slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base);
 	if (IS_ERR(slot))
 		return PTR_ERR(slot);
 
+	*nextid = iter.index + base;
+	/* there is a memory barrier inside radix_tree_iter_replace() */
 	radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
 	radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
 
-	if (index)
-		*index = iter.index;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(idr_alloc_cmn);
+EXPORT_SYMBOL_GPL(idr_alloc_u32);
 
 /**
- * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion
- * @idr: idr handle
- * @ptr: pointer to be associated with the new id
- * @start: the minimum id (inclusive)
- * @end: the maximum id (exclusive)
- * @gfp: memory allocation flags
- *
- * Allocates an ID larger than the last ID allocated if one is available.
- * If not, it will attempt to allocate the smallest ID that is larger or
- * equal to @start.
+ * idr_alloc() - Allocate an ID.
+ * @idr: IDR handle.
+ * @ptr: Pointer to be associated with the new ID.
+ * @start: The minimum ID (inclusive).
+ * @end: The maximum ID (exclusive).
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @start and @end.  If
+ * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
+ * callers to use @start + N as @end as long as N is within integer range.
+ *
+ * The caller should provide their own locking to ensure that two
+ * concurrent modifications to the IDR are not possible.  Read-only
+ * accesses to the IDR may be done under the RCU read lock or may
+ * exclude simultaneous writers.
+ *
+ * Return: The newly allocated ID, -ENOMEM if memory allocation failed,
+ * or -ENOSPC if no free IDs could be found.
  */
-int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
+int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
 {
-	int id, curr = idr->idr_next;
+	u32 id = start;
+	int ret;
+
+	if (WARN_ON_ONCE(start < 0))
+		return -EINVAL;
+
+	ret = idr_alloc_u32(idr, ptr, &id, end > 0 ? end - 1 : INT_MAX, gfp);
+	if (ret)
+		return ret;
+
+	return id;
+}
+EXPORT_SYMBOL_GPL(idr_alloc);
 
-	if (curr < start)
-		curr = start;
+/**
+ * idr_alloc_cyclic() - Allocate an ID cyclically.
+ * @idr: IDR handle.
+ * @ptr: Pointer to be associated with the new ID.
+ * @start: The minimum ID (inclusive).
+ * @end: The maximum ID (exclusive).
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @nextid and @end.  If
+ * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
+ * callers to use @start + N as @end as long as N is within integer range.
+ * The search for an unused ID will start at the last ID allocated and will
+ * wrap around to @start if no free IDs are found before reaching @end.
+ *
+ * The caller should provide their own locking to ensure that two
+ * concurrent modifications to the IDR are not possible.  Read-only
+ * accesses to the IDR may be done under the RCU read lock or may
+ * exclude simultaneous writers.
+ *
+ * Return: The newly allocated ID, -ENOMEM if memory allocation failed,
+ * or -ENOSPC if no free IDs could be found.
+ */
+int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
+{
+	u32 id = idr->idr_next;
+	int err, max = end > 0 ? end - 1 : INT_MAX;
 
-	id = idr_alloc(idr, ptr, curr, end, gfp);
-	if ((id == -ENOSPC) && (curr > start))
-		id = idr_alloc(idr, ptr, start, curr, gfp);
+	if ((int)id < start)
+		id = start;
 
-	if (id >= 0)
-		idr->idr_next = id + 1U;
+	err = idr_alloc_u32(idr, ptr, &id, max, gfp);
+	if ((err == -ENOSPC) && (id > start)) {
+		id = start;
+		err = idr_alloc_u32(idr, ptr, &id, max, gfp);
+	}
+	if (err)
+		return err;
 
+	idr->idr_next = id + 1;
 	return id;
 }
 EXPORT_SYMBOL(idr_alloc_cyclic);
 
 /**
- * idr_for_each - iterate through all stored pointers
- * @idr: idr handle
- * @fn: function to be called for each pointer
- * @data: data passed to callback function
+ * idr_remove() - Remove an ID from the IDR.
+ * @idr: IDR handle.
+ * @id: Pointer ID.
+ *
+ * Removes this ID from the IDR.  If the ID was not previously in the IDR,
+ * this function returns %NULL.
+ *
+ * Since this function modifies the IDR, the caller should provide their
+ * own locking to ensure that concurrent modification of the same IDR is
+ * not possible.
+ *
+ * Return: The pointer formerly associated with this ID.
+ */
+void *idr_remove(struct idr *idr, unsigned long id)
+{
+	return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL);
+}
+EXPORT_SYMBOL_GPL(idr_remove);
+
+/**
+ * idr_find() - Return pointer for given ID.
+ * @idr: IDR handle.
+ * @id: Pointer ID.
+ *
+ * Looks up the pointer associated with this ID.  A %NULL pointer may
+ * indicate that @id is not allocated or that the %NULL pointer was
+ * associated with this ID.
+ *
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
+ *
+ * Return: The pointer associated with this ID.
+ */
+void *idr_find(const struct idr *idr, unsigned long id)
+{
+	return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base);
+}
+EXPORT_SYMBOL_GPL(idr_find);
+
+/**
+ * idr_for_each() - Iterate through all stored pointers.
+ * @idr: IDR handle.
+ * @fn: Function to be called for each pointer.
+ * @data: Data passed to callback function.
  *
  * The callback function will be called for each entry in @idr, passing
- * the id, the pointer and the data pointer passed to this function.
+ * the ID, the entry and @data.
  *
  * If @fn returns anything other than %0, the iteration stops and that
  * value is returned from this function.
@@ -86,9 +200,14 @@ int idr_for_each(const struct idr *idr,
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	int base = idr->idr_base;
 
 	radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
-		int ret = fn(iter.index, rcu_dereference_raw(*slot), data);
+		int ret;
+
+		if (WARN_ON_ONCE(iter.index > INT_MAX))
+			break;
+		ret = fn(iter.index + base, rcu_dereference_raw(*slot), data);
 		if (ret)
 			return ret;
 	}
@@ -98,9 +217,9 @@ int idr_for_each(const struct idr *idr,
 EXPORT_SYMBOL(idr_for_each);
 
 /**
- * idr_get_next - Find next populated entry
- * @idr: idr handle
- * @nextid: Pointer to lowest possible ID to return
+ * idr_get_next() - Find next populated entry.
+ * @idr: IDR handle.
+ * @nextid: Pointer to an ID.
  *
  * Returns the next populated entry in the tree with an ID greater than
  * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
@@ -111,35 +230,55 @@ void *idr_get_next(struct idr *idr, int *nextid)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	int base = idr->idr_base;
+	int id = *nextid;
 
-	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	id = (id < base) ? 0 : id - base;
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
 	if (!slot)
 		return NULL;
+	id = iter.index + base;
+
+	if (WARN_ON_ONCE(id > INT_MAX))
+		return NULL;
 
-	*nextid = iter.index;
+	*nextid = id;
 	return rcu_dereference_raw(*slot);
 }
 EXPORT_SYMBOL(idr_get_next);
 
-void *idr_get_next_ext(struct idr *idr, unsigned long *nextid)
+/**
+ * idr_get_next_ul() - Find next populated entry.
+ * @idr: IDR handle.
+ * @nextid: Pointer to an ID.
+ *
+ * Returns the next populated entry in the tree with an ID greater than
+ * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
+ * to the ID of the found value.  To use in a loop, the value pointed to by
+ * nextid must be incremented by the user.
+ */
+void *idr_get_next_ul(struct idr *idr, unsigned long *nextid)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	unsigned long base = idr->idr_base;
+	unsigned long id = *nextid;
 
-	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	id = (id < base) ? 0 : id - base;
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
 	if (!slot)
 		return NULL;
 
-	*nextid = iter.index;
+	*nextid = iter.index + base;
 	return rcu_dereference_raw(*slot);
 }
-EXPORT_SYMBOL(idr_get_next_ext);
+EXPORT_SYMBOL(idr_get_next_ul);
 
 /**
- * idr_replace - replace pointer for given id
- * @idr: idr handle
- * @ptr: New pointer to associate with the ID
- * @id: Lookup key
+ * idr_replace() - replace pointer for given ID.
+ * @idr: IDR handle.
+ * @ptr: New pointer to associate with the ID.
+ * @id: ID to change.
  *
  * Replace the pointer registered with an ID and return the old value.
  * This function can be called under the RCU read lock concurrently with
@@ -147,18 +286,9 @@ EXPORT_SYMBOL(idr_get_next_ext);
  * the one being replaced!).
  *
  * Returns: the old value on success.  %-ENOENT indicates that @id was not
- * found.  %-EINVAL indicates that @id or @ptr were not valid.
+ * found.  %-EINVAL indicates that @ptr was not valid.
  */
-void *idr_replace(struct idr *idr, void *ptr, int id)
-{
-	if (id < 0)
-		return ERR_PTR(-EINVAL);
-
-	return idr_replace_ext(idr, ptr, id);
-}
-EXPORT_SYMBOL(idr_replace);
-
-void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
+void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
 {
 	struct radix_tree_node *node;
 	void __rcu **slot = NULL;
@@ -166,6 +296,7 @@ void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
 
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return ERR_PTR(-EINVAL);
+	id -= idr->idr_base;
 
 	entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
 	if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
@@ -175,7 +306,7 @@ void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
 
 	return entry;
 }
-EXPORT_SYMBOL(idr_replace_ext);
+EXPORT_SYMBOL(idr_replace);
 
 /**
  * DOC: IDA description
@@ -235,7 +366,7 @@ EXPORT_SYMBOL(idr_replace_ext);
  * bitmap, which is excessive.
  */
 
-#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
+#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
 
 /**
  * ida_get_new_above - allocate new ID above or equal to a start id
diff --git a/lib/kobject.c b/lib/kobject.c
index 763d70a18941..afd5a3fc6123 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kobject.c - library routines for handling generic kernel objects
  *
@@ -5,9 +6,6 @@
  * Copyright (c) 2006-2007 Greg Kroah-Hartman <greg@kroah.com>
  * Copyright (c) 2006-2007 Novell Inc.
  *
- * This file is released under the GPLv2.
- *
- *
  * Please see the file Documentation/kobject.txt for critical information
  * about using the kobject interface.
  */
@@ -1039,6 +1037,7 @@ void *kobj_ns_grab_current(enum kobj_ns_type type)
 
 	return ns;
 }
+EXPORT_SYMBOL_GPL(kobj_ns_grab_current);
 
 const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
 {
@@ -1074,3 +1073,4 @@ void kobj_ns_drop(enum kobj_ns_type type, void *ns)
 		kobj_ns_ops_tbl[type]->drop_ns(ns);
 	spin_unlock(&kobj_ns_type_lock);
 }
+EXPORT_SYMBOL_GPL(kobj_ns_drop);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 2615074d3de5..9fe6ec8fda28 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * kernel userspace event delivery
  *
@@ -5,8 +6,6 @@
  * Copyright (C) 2004 Novell, Inc.  All rights reserved.
  * Copyright (C) 2004 IBM, Inc. All rights reserved.
  *
- * Licensed under the GNU GPL v2.
- *
  * Authors:
  *	Robert Love		<rml@novell.com>
  *	Kay Sievers		<kay.sievers@vrfy.org>
diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c
index c10fba461454..2d3eb1cb73b8 100644
--- a/lib/pci_iomap.c
+++ b/lib/pci_iomap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Implement the default iomap interfaces
  *
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index fe03c6d52761..30e7dd88148b 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -197,10 +197,10 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 	atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
 
 	/*
-	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
-	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following __PERCPU_REF_ATOMIC clearing.
+	 * Restore per-cpu operation.  smp_store_release() is paired
+	 * with READ_ONCE() in __ref_is_percpu() and guarantees that the
+	 * zeroing is visible to all percpu accesses which can see the
+	 * following __PERCPU_REF_ATOMIC clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c8d55565fafa..0a7ae3288a24 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -24,6 +24,7 @@
 
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
+#include <linux/bug.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/export.h>
@@ -2135,7 +2136,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 }
 EXPORT_SYMBOL(ida_pre_get);
 
-void __rcu **idr_get_free_cmn(struct radix_tree_root *root,
+void __rcu **idr_get_free(struct radix_tree_root *root,
 			      struct radix_tree_iter *iter, gfp_t gfp,
 			      unsigned long max)
 {
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index ddd7dde87c3c..3825c30aaa36 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -65,42 +65,6 @@ EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #define ASSERT_RHT_MUTEX(HT)
 #endif
 
-
-static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
-			      gfp_t gfp)
-{
-	unsigned int i, size;
-#if defined(CONFIG_PROVE_LOCKING)
-	unsigned int nr_pcpus = 2;
-#else
-	unsigned int nr_pcpus = num_possible_cpus();
-#endif
-
-	nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
-	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
-
-	/* Never allocate more than 0.5 locks per bucket */
-	size = min_t(unsigned int, size, tbl->size >> 1);
-
-	if (tbl->nest)
-		size = min(size, 1U << tbl->nest);
-
-	if (sizeof(spinlock_t) != 0) {
-		if (gfpflags_allow_blocking(gfp))
-			tbl->locks = kvmalloc(size * sizeof(spinlock_t), gfp);
-		else
-			tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
-						   gfp);
-		if (!tbl->locks)
-			return -ENOMEM;
-		for (i = 0; i < size; i++)
-			spin_lock_init(&tbl->locks[i]);
-	}
-	tbl->locks_mask = size - 1;
-
-	return 0;
-}
-
 static void nested_table_free(union nested_table *ntbl, unsigned int size)
 {
 	const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
@@ -140,7 +104,7 @@ static void bucket_table_free(const struct bucket_table *tbl)
 	if (tbl->nest)
 		nested_bucket_table_free(tbl);
 
-	kvfree(tbl->locks);
+	free_bucket_spinlocks(tbl->locks);
 	kvfree(tbl);
 }
 
@@ -207,7 +171,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 					       gfp_t gfp)
 {
 	struct bucket_table *tbl = NULL;
-	size_t size;
+	size_t size, max_locks;
 	int i;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
@@ -227,7 +191,12 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 
 	tbl->size = size;
 
-	if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
+	max_locks = size >> 1;
+	if (tbl->nest)
+		max_locks = min_t(size_t, max_locks, 1U << tbl->nest);
+
+	if (alloc_bucket_spinlocks(&tbl->locks, &tbl->locks_mask, max_locks,
+				   ht->p.locks_mul, gfp) < 0) {
 		bucket_table_free(tbl);
 		return NULL;
 	}
@@ -707,6 +676,7 @@ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
 	iter->p = NULL;
 	iter->slot = 0;
 	iter->skip = 0;
+	iter->end_of_table = 0;
 
 	spin_lock(&ht->lock);
 	iter->walker.tbl =
@@ -732,7 +702,7 @@ void rhashtable_walk_exit(struct rhashtable_iter *iter)
 EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
 
 /**
- * rhashtable_walk_start - Start a hash table walk
+ * rhashtable_walk_start_check - Start a hash table walk
  * @iter:	Hash table iterator
  *
  * Start a hash table walk at the current iterator position.  Note that we take
@@ -744,8 +714,12 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
  * Returns -EAGAIN if resize event occured.  Note that the iterator
  * will rewind back to the beginning and you may use it immediately
  * by calling rhashtable_walk_next.
+ *
+ * rhashtable_walk_start is defined as an inline variant that returns
+ * void. This is preferred in cases where the caller would ignore
+ * resize events and always continue.
  */
-int rhashtable_walk_start(struct rhashtable_iter *iter)
+int rhashtable_walk_start_check(struct rhashtable_iter *iter)
 	__acquires(RCU)
 {
 	struct rhashtable *ht = iter->ht;
@@ -757,28 +731,26 @@ int rhashtable_walk_start(struct rhashtable_iter *iter)
 		list_del(&iter->walker.list);
 	spin_unlock(&ht->lock);
 
-	if (!iter->walker.tbl) {
+	if (!iter->walker.tbl && !iter->end_of_table) {
 		iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
 		return -EAGAIN;
 	}
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(rhashtable_walk_start);
+EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
 
 /**
- * rhashtable_walk_next - Return the next object and advance the iterator
- * @iter:	Hash table iterator
+ * __rhashtable_walk_find_next - Find the next element in a table (or the first
+ * one in case of a new walk).
  *
- * Note that you must call rhashtable_walk_stop when you are finished
- * with the walk.
+ * @iter:	Hash table iterator
  *
- * Returns the next object or NULL when the end of the table is reached.
+ * Returns the found object or NULL when the end of the table is reached.
  *
- * Returns -EAGAIN if resize event occured.  Note that the iterator
- * will rewind back to the beginning and you may continue to use it.
+ * Returns -EAGAIN if resize event occurred.
  */
-void *rhashtable_walk_next(struct rhashtable_iter *iter)
+static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
 {
 	struct bucket_table *tbl = iter->walker.tbl;
 	struct rhlist_head *list = iter->list;
@@ -786,13 +758,8 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
 	struct rhash_head *p = iter->p;
 	bool rhlist = ht->rhlist;
 
-	if (p) {
-		if (!rhlist || !(list = rcu_dereference(list->next))) {
-			p = rcu_dereference(p->next);
-			list = container_of(p, struct rhlist_head, rhead);
-		}
-		goto next;
-	}
+	if (!tbl)
+		return NULL;
 
 	for (; iter->slot < tbl->size; iter->slot++) {
 		int skip = iter->skip;
@@ -836,13 +803,90 @@ next:
 		iter->slot = 0;
 		iter->skip = 0;
 		return ERR_PTR(-EAGAIN);
+	} else {
+		iter->end_of_table = true;
 	}
 
 	return NULL;
 }
+
+/**
+ * rhashtable_walk_next - Return the next object and advance the iterator
+ * @iter:	Hash table iterator
+ *
+ * Note that you must call rhashtable_walk_stop when you are finished
+ * with the walk.
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred.  Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_next(struct rhashtable_iter *iter)
+{
+	struct rhlist_head *list = iter->list;
+	struct rhashtable *ht = iter->ht;
+	struct rhash_head *p = iter->p;
+	bool rhlist = ht->rhlist;
+
+	if (p) {
+		if (!rhlist || !(list = rcu_dereference(list->next))) {
+			p = rcu_dereference(p->next);
+			list = container_of(p, struct rhlist_head, rhead);
+		}
+		if (!rht_is_a_nulls(p)) {
+			iter->skip++;
+			iter->p = p;
+			iter->list = list;
+			return rht_obj(ht, rhlist ? &list->rhead : p);
+		}
+
+		/* At the end of this slot, switch to next one and then find
+		 * next entry from that point.
+		 */
+		iter->skip = 0;
+		iter->slot++;
+	}
+
+	return __rhashtable_walk_find_next(iter);
+}
 EXPORT_SYMBOL_GPL(rhashtable_walk_next);
 
 /**
+ * rhashtable_walk_peek - Return the next object but don't advance the iterator
+ * @iter:	Hash table iterator
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occurred.  Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_peek(struct rhashtable_iter *iter)
+{
+	struct rhlist_head *list = iter->list;
+	struct rhashtable *ht = iter->ht;
+	struct rhash_head *p = iter->p;
+
+	if (p)
+		return rht_obj(ht, ht->rhlist ? &list->rhead : p);
+
+	/* No object found in current iter, find next one in the table. */
+
+	if (iter->skip) {
+		/* A nonzero skip value points to the next entry in the table
+		 * beyond that last one that was found. Decrement skip so
+		 * we find the current value. __rhashtable_walk_find_next
+		 * will restore the original value of skip assuming that
+		 * the table hasn't changed.
+		 */
+		iter->skip--;
+	}
+
+	return __rhashtable_walk_find_next(iter);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
+
+/**
  * rhashtable_walk_stop - Finish a hash table walk
  * @iter:	Hash table iterator
  *
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 80aa8d5463fa..42b5ca0acf93 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -462,7 +462,7 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
 		 */
 		atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch);
 		sbq_index_atomic_inc(&sbq->wake_index);
-		wake_up(&ws->wait);
+		wake_up_nr(&ws->wait, wake_batch);
 	}
 }
 
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 7c1c55f7daaa..53728d391d3a 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -474,6 +474,133 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 }
 EXPORT_SYMBOL(sg_alloc_table_from_pages);
 
+#ifdef CONFIG_SGL_ALLOC
+
+/**
+ * sgl_alloc_order - allocate a scatterlist and its pages
+ * @length: Length in bytes of the scatterlist. Must be at least one
+ * @order: Second argument for alloc_pages()
+ * @chainable: Whether or not to allocate an extra element in the scatterlist
+ *	for scatterlist chaining purposes
+ * @gfp: Memory allocation flags
+ * @nent_p: [out] Number of entries in the scatterlist that have pages
+ *
+ * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
+ */
+struct scatterlist *sgl_alloc_order(unsigned long long length,
+				    unsigned int order, bool chainable,
+				    gfp_t gfp, unsigned int *nent_p)
+{
+	struct scatterlist *sgl, *sg;
+	struct page *page;
+	unsigned int nent, nalloc;
+	u32 elem_len;
+
+	nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order);
+	/* Check for integer overflow */
+	if (length > (nent << (PAGE_SHIFT + order)))
+		return NULL;
+	nalloc = nent;
+	if (chainable) {
+		/* Check for integer overflow */
+		if (nalloc + 1 < nalloc)
+			return NULL;
+		nalloc++;
+	}
+	sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
+			    (gfp & ~GFP_DMA) | __GFP_ZERO);
+	if (!sgl)
+		return NULL;
+
+	sg_init_table(sgl, nalloc);
+	sg = sgl;
+	while (length) {
+		elem_len = min_t(u64, length, PAGE_SIZE << order);
+		page = alloc_pages(gfp, order);
+		if (!page) {
+			sgl_free(sgl);
+			return NULL;
+		}
+
+		sg_set_page(sg, page, elem_len, 0);
+		length -= elem_len;
+		sg = sg_next(sg);
+	}
+	WARN_ONCE(length, "length = %lld\n", length);
+	if (nent_p)
+		*nent_p = nent;
+	return sgl;
+}
+EXPORT_SYMBOL(sgl_alloc_order);
+
+/**
+ * sgl_alloc - allocate a scatterlist and its pages
+ * @length: Length in bytes of the scatterlist
+ * @gfp: Memory allocation flags
+ * @nent_p: [out] Number of entries in the scatterlist
+ *
+ * Returns: A pointer to an initialized scatterlist or %NULL upon failure.
+ */
+struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
+			      unsigned int *nent_p)
+{
+	return sgl_alloc_order(length, 0, false, gfp, nent_p);
+}
+EXPORT_SYMBOL(sgl_alloc);
+
+/**
+ * sgl_free_n_order - free a scatterlist and its pages
+ * @sgl: Scatterlist with one or more elements
+ * @nents: Maximum number of elements to free
+ * @order: Second argument for __free_pages()
+ *
+ * Notes:
+ * - If several scatterlists have been chained and each chain element is
+ *   freed separately then it's essential to set nents correctly to avoid that a
+ *   page would get freed twice.
+ * - All pages in a chained scatterlist can be freed at once by setting @nents
+ *   to a high number.
+ */
+void sgl_free_n_order(struct scatterlist *sgl, int nents, int order)
+{
+	struct scatterlist *sg;
+	struct page *page;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i) {
+		if (!sg)
+			break;
+		page = sg_page(sg);
+		if (page)
+			__free_pages(page, order);
+	}
+	kfree(sgl);
+}
+EXPORT_SYMBOL(sgl_free_n_order);
+
+/**
+ * sgl_free_order - free a scatterlist and its pages
+ * @sgl: Scatterlist with one or more elements
+ * @order: Second argument for __free_pages()
+ */
+void sgl_free_order(struct scatterlist *sgl, int order)
+{
+	sgl_free_n_order(sgl, INT_MAX, order);
+}
+EXPORT_SYMBOL(sgl_free_order);
+
+/**
+ * sgl_free - free a scatterlist and its pages
+ * @sgl: Scatterlist with one or more elements
+ */
+void sgl_free(struct scatterlist *sgl)
+{
+	sgl_free_order(sgl, 0);
+}
+EXPORT_SYMBOL(sgl_free);
+
+#endif /* CONFIG_SGL_ALLOC */
+
 void __sg_page_iter_start(struct sg_page_iter *piter,
 			  struct scatterlist *sglist, unsigned int nents,
 			  unsigned long pgoffset)
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 835cc6df2776..85925aaa4fff 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -5,7 +5,6 @@
  * DEBUG_PREEMPT variant of smp_processor_id().
  */
 #include <linux/export.h>
-#include <linux/kallsyms.h>
 #include <linux/sched.h>
 
 notrace static unsigned int check_preemption_disabled(const char *what1,
@@ -43,7 +42,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1,
 	printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n",
 		what1, what2, preempt_count() - 1, current->comm, current->pid);
 
-	print_symbol("caller is %s\n", (long)__builtin_return_address(0));
+	printk("caller is %pS\n", __builtin_return_address(0));
 	dump_stack();
 
 out_enable:
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index f87d138e9672..e513459a5601 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -163,6 +163,21 @@ static inline u32 hash_stack(unsigned long *entries, unsigned int size)
 			       STACK_HASH_SEED);
 }
 
+/* Use our own, non-instrumented version of memcmp().
+ *
+ * We actually don't care about the order, just the equality.
+ */
+static inline
+int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
+			unsigned int n)
+{
+	for ( ; n-- ; u1++, u2++) {
+		if (*u1 != *u2)
+			return 1;
+	}
+	return 0;
+}
+
 /* Find a stack that is equal to the one stored in entries in the hash */
 static inline struct stack_record *find_stack(struct stack_record *bucket,
 					     unsigned long *entries, int size,
@@ -173,10 +188,8 @@ static inline struct stack_record *find_stack(struct stack_record *bucket,
 	for (found = bucket; found; found = found->next) {
 		if (found->hash == hash &&
 		    found->size == size &&
-		    !memcmp(entries, found->entries,
-			    size * sizeof(unsigned long))) {
+		    !stackdepot_memcmp(entries, found->entries, size))
 			return found;
-		}
 	}
 	return NULL;
 }
diff --git a/lib/string.c b/lib/string.c
index 64a9e33f1daa..2c0900a5d51a 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -203,7 +203,7 @@ ssize_t strscpy(char *dest, const char *src, size_t count)
 	while (max >= sizeof(unsigned long)) {
 		unsigned long c, data;
 
-		c = *(unsigned long *)(src+res);
+		c = read_word_at_a_time(src+res);
 		if (has_zero(c, &data, &constants)) {
 			data = prep_zero_mask(c, data, &constants);
 			data = create_zero_mask(data);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index cea19aaf303c..c43ec2271469 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -18,7 +18,7 @@
  */
 
 #include <linux/cache.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/mm.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
@@ -417,7 +417,7 @@ cleanup2:
 	return -ENOMEM;
 }
 
-void __init swiotlb_free(void)
+void __init swiotlb_exit(void)
 {
 	if (!io_tlb_orig_addr)
 		return;
@@ -586,7 +586,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 
 not_found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
-	if (printk_ratelimit())
+	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
 		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
 	return SWIOTLB_MAP_ERROR;
 found:
@@ -605,7 +605,6 @@ found:
 
 	return tlb_addr;
 }
-EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
 
 /*
  * Allocates bounce buffer and returns its kernel virtual address.
@@ -675,7 +674,6 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	}
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 }
-EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
 
 void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			     size_t size, enum dma_data_direction dir,
@@ -707,92 +705,107 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 		BUG();
 	}
 }
-EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
+
+static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
+		size_t size)
+{
+	u64 mask = DMA_BIT_MASK(32);
+
+	if (dev && dev->coherent_dma_mask)
+		mask = dev->coherent_dma_mask;
+	return addr + size - 1 <= mask;
+}
+
+static void *
+swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		unsigned long attrs)
+{
+	phys_addr_t phys_addr;
+
+	if (swiotlb_force == SWIOTLB_NO_FORCE)
+		goto out_warn;
+
+	phys_addr = swiotlb_tbl_map_single(dev,
+			swiotlb_phys_to_dma(dev, io_tlb_start),
+			0, size, DMA_FROM_DEVICE, 0);
+	if (phys_addr == SWIOTLB_MAP_ERROR)
+		goto out_warn;
+
+	*dma_handle = swiotlb_phys_to_dma(dev, phys_addr);
+	if (dma_coherent_ok(dev, *dma_handle, size))
+		goto out_unmap;
+
+	memset(phys_to_virt(phys_addr), 0, size);
+	return phys_to_virt(phys_addr);
+
+out_unmap:
+	dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
+		(unsigned long long)(dev ? dev->coherent_dma_mask : 0),
+		(unsigned long long)*dma_handle);
+
+	/*
+	 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
+	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
+	 */
+	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
+			DMA_ATTR_SKIP_CPU_SYNC);
+out_warn:
+	if ((attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
+		dev_warn(dev,
+			"swiotlb: coherent allocation failed, size=%zu\n",
+			size);
+		dump_stack();
+	}
+	return NULL;
+}
 
 void *
 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		       dma_addr_t *dma_handle, gfp_t flags)
 {
-	dma_addr_t dev_addr;
-	void *ret;
 	int order = get_order(size);
-	u64 dma_mask = DMA_BIT_MASK(32);
-
-	if (hwdev && hwdev->coherent_dma_mask)
-		dma_mask = hwdev->coherent_dma_mask;
+	unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0;
+	void *ret;
 
 	ret = (void *)__get_free_pages(flags, order);
 	if (ret) {
-		dev_addr = swiotlb_virt_to_bus(hwdev, ret);
-		if (dev_addr + size - 1 > dma_mask) {
-			/*
-			 * The allocated memory isn't reachable by the device.
-			 */
-			free_pages((unsigned long) ret, order);
-			ret = NULL;
+		*dma_handle = swiotlb_virt_to_bus(hwdev, ret);
+		if (dma_coherent_ok(hwdev, *dma_handle, size)) {
+			memset(ret, 0, size);
+			return ret;
 		}
+		free_pages((unsigned long)ret, order);
 	}
-	if (!ret) {
-		/*
-		 * We are either out of memory or the device can't DMA to
-		 * GFP_DMA memory; fall back on map_single(), which
-		 * will grab memory from the lowest available address range.
-		 */
-		phys_addr_t paddr = map_single(hwdev, 0, size,
-					       DMA_FROM_DEVICE, 0);
-		if (paddr == SWIOTLB_MAP_ERROR)
-			goto err_warn;
 
-		ret = phys_to_virt(paddr);
-		dev_addr = swiotlb_phys_to_dma(hwdev, paddr);
-
-		/* Confirm address can be DMA'd by device */
-		if (dev_addr + size - 1 > dma_mask) {
-			printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-			       (unsigned long long)dma_mask,
-			       (unsigned long long)dev_addr);
-
-			/*
-			 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
-			 * The DMA_ATTR_SKIP_CPU_SYNC is optional.
-			 */
-			swiotlb_tbl_unmap_single(hwdev, paddr,
-						 size, DMA_TO_DEVICE,
-						 DMA_ATTR_SKIP_CPU_SYNC);
-			goto err_warn;
-		}
-	}
+	return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs);
+}
+EXPORT_SYMBOL(swiotlb_alloc_coherent);
 
-	*dma_handle = dev_addr;
-	memset(ret, 0, size);
+static bool swiotlb_free_buffer(struct device *dev, size_t size,
+		dma_addr_t dma_addr)
+{
+	phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
 
-	return ret;
+	WARN_ON_ONCE(irqs_disabled());
 
-err_warn:
-	pr_warn("swiotlb: coherent allocation failed for device %s size=%zu\n",
-		dev_name(hwdev), size);
-	dump_stack();
+	if (!is_swiotlb_buffer(phys_addr))
+		return false;
 
-	return NULL;
+	/*
+	 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
+	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
+	 */
+	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
+				 DMA_ATTR_SKIP_CPU_SYNC);
+	return true;
 }
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
 
 void
 swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 		      dma_addr_t dev_addr)
 {
-	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
-
-	WARN_ON(irqs_disabled());
-	if (!is_swiotlb_buffer(paddr))
+	if (!swiotlb_free_buffer(hwdev, size, dev_addr))
 		free_pages((unsigned long)vaddr, get_order(size));
-	else
-		/*
-		 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
-		 * DMA_ATTR_SKIP_CPU_SYNC is optional.
-		 */
-		swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE,
-					 DMA_ATTR_SKIP_CPU_SYNC);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
@@ -868,7 +881,6 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 
 	return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
 }
-EXPORT_SYMBOL_GPL(swiotlb_map_page);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
@@ -909,7 +921,6 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 {
 	unmap_single(hwdev, dev_addr, size, dir, attrs);
 }
-EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 
 /*
  * Make physical memory consistent for a single streaming mode DMA translation
@@ -947,7 +958,6 @@ swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
 {
 	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
 }
-EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
 
 void
 swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
@@ -955,7 +965,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
 {
 	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
 }
-EXPORT_SYMBOL(swiotlb_sync_single_for_device);
 
 /*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
@@ -1007,7 +1016,6 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	}
 	return nelems;
 }
-EXPORT_SYMBOL(swiotlb_map_sg_attrs);
 
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
@@ -1027,7 +1035,6 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 		unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
 			     attrs);
 }
-EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
 /*
  * Make physical memory consistent for a set of streaming mode DMA translations
@@ -1055,7 +1062,6 @@ swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
 {
 	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
 }
-EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
 
 void
 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
@@ -1063,14 +1069,12 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 {
 	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
 }
-EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
 
 int
 swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
 	return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
 }
-EXPORT_SYMBOL(swiotlb_dma_mapping_error);
 
 /*
  * Return whether the given device DMA address mask can be supported
@@ -1083,4 +1087,49 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
 	return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
-EXPORT_SYMBOL(swiotlb_dma_supported);
+
+#ifdef CONFIG_DMA_DIRECT_OPS
+void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
+{
+	void *vaddr;
+
+	/* temporary workaround: */
+	if (gfp & __GFP_NOWARN)
+		attrs |= DMA_ATTR_NO_WARN;
+
+	/*
+	 * Don't print a warning when the first allocation attempt fails.
+	 * swiotlb_alloc_coherent() will print a warning when the DMA memory
+	 * allocation ultimately failed.
+	 */
+	gfp |= __GFP_NOWARN;
+
+	vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
+	if (!vaddr)
+		vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
+	return vaddr;
+}
+
+void swiotlb_free(struct device *dev, size_t size, void *vaddr,
+		dma_addr_t dma_addr, unsigned long attrs)
+{
+	if (!swiotlb_free_buffer(dev, size, dma_addr))
+		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
+}
+
+const struct dma_map_ops swiotlb_dma_ops = {
+	.mapping_error		= swiotlb_dma_mapping_error,
+	.alloc			= swiotlb_alloc,
+	.free			= swiotlb_free,
+	.sync_single_for_cpu	= swiotlb_sync_single_for_cpu,
+	.sync_single_for_device	= swiotlb_sync_single_for_device,
+	.sync_sg_for_cpu	= swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device	= swiotlb_sync_sg_for_device,
+	.map_sg			= swiotlb_map_sg_attrs,
+	.unmap_sg		= swiotlb_unmap_sg_attrs,
+	.map_page		= swiotlb_map_page,
+	.unmap_page		= swiotlb_unmap_page,
+	.dma_supported		= swiotlb_dma_supported,
+};
+#endif /* CONFIG_DMA_DIRECT_OPS */
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index aa1f2669bdd5..b3f235baa05d 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -23,7 +23,7 @@ __check_eq_uint(const char *srcfile, unsigned int line,
 		const unsigned int exp_uint, unsigned int x)
 {
 	if (exp_uint != x) {
-		pr_warn("[%s:%u] expected %u, got %u\n",
+		pr_err("[%s:%u] expected %u, got %u\n",
 			srcfile, line, exp_uint, x);
 		return false;
 	}
@@ -33,19 +33,13 @@ __check_eq_uint(const char *srcfile, unsigned int line,
 
 static bool __init
 __check_eq_bitmap(const char *srcfile, unsigned int line,
-		  const unsigned long *exp_bmap, unsigned int exp_nbits,
-		  const unsigned long *bmap, unsigned int nbits)
+		  const unsigned long *exp_bmap, const unsigned long *bmap,
+		  unsigned int nbits)
 {
-	if (exp_nbits != nbits) {
-		pr_warn("[%s:%u] bitmap length mismatch: expected %u, got %u\n",
-			srcfile, line, exp_nbits, nbits);
-		return false;
-	}
-
 	if (!bitmap_equal(exp_bmap, bmap, nbits)) {
 		pr_warn("[%s:%u] bitmaps contents differ: expected \"%*pbl\", got \"%*pbl\"\n",
 			srcfile, line,
-			exp_nbits, exp_bmap, nbits, bmap);
+			nbits, exp_bmap, nbits, bmap);
 		return false;
 	}
 	return true;
@@ -69,6 +63,10 @@ __check_eq_pbl(const char *srcfile, unsigned int line,
 static bool __init
 __check_eq_u32_array(const char *srcfile, unsigned int line,
 		     const u32 *exp_arr, unsigned int exp_len,
+		     const u32 *arr, unsigned int len) __used;
+static bool __init
+__check_eq_u32_array(const char *srcfile, unsigned int line,
+		     const u32 *exp_arr, unsigned int exp_len,
 		     const u32 *arr, unsigned int len)
 {
 	if (exp_len != len) {
@@ -107,7 +105,65 @@ __check_eq_u32_array(const char *srcfile, unsigned int line,
 #define expect_eq_pbl(...)		__expect_eq(pbl, ##__VA_ARGS__)
 #define expect_eq_u32_array(...)	__expect_eq(u32_array, ##__VA_ARGS__)
 
-static void __init test_zero_fill_copy(void)
+static void __init test_zero_clear(void)
+{
+	DECLARE_BITMAP(bmap, 1024);
+
+	/* Known way to set all bits */
+	memset(bmap, 0xff, 128);
+
+	expect_eq_pbl("0-22", bmap, 23);
+	expect_eq_pbl("0-1023", bmap, 1024);
+
+	/* single-word bitmaps */
+	bitmap_clear(bmap, 0, 9);
+	expect_eq_pbl("9-1023", bmap, 1024);
+
+	bitmap_zero(bmap, 35);
+	expect_eq_pbl("64-1023", bmap, 1024);
+
+	/* cross boundaries operations */
+	bitmap_clear(bmap, 79, 19);
+	expect_eq_pbl("64-78,98-1023", bmap, 1024);
+
+	bitmap_zero(bmap, 115);
+	expect_eq_pbl("128-1023", bmap, 1024);
+
+	/* Zeroing entire area */
+	bitmap_zero(bmap, 1024);
+	expect_eq_pbl("", bmap, 1024);
+}
+
+static void __init test_fill_set(void)
+{
+	DECLARE_BITMAP(bmap, 1024);
+
+	/* Known way to clear all bits */
+	memset(bmap, 0x00, 128);
+
+	expect_eq_pbl("", bmap, 23);
+	expect_eq_pbl("", bmap, 1024);
+
+	/* single-word bitmaps */
+	bitmap_set(bmap, 0, 9);
+	expect_eq_pbl("0-8", bmap, 1024);
+
+	bitmap_fill(bmap, 35);
+	expect_eq_pbl("0-63", bmap, 1024);
+
+	/* cross boundaries operations */
+	bitmap_set(bmap, 79, 19);
+	expect_eq_pbl("0-63,79-97", bmap, 1024);
+
+	bitmap_fill(bmap, 115);
+	expect_eq_pbl("0-127", bmap, 1024);
+
+	/* Zeroing entire area */
+	bitmap_fill(bmap, 1024);
+	expect_eq_pbl("0-1023", bmap, 1024);
+}
+
+static void __init test_copy(void)
 {
 	DECLARE_BITMAP(bmap1, 1024);
 	DECLARE_BITMAP(bmap2, 1024);
@@ -116,36 +172,20 @@ static void __init test_zero_fill_copy(void)
 	bitmap_zero(bmap2, 1024);
 
 	/* single-word bitmaps */
-	expect_eq_pbl("", bmap1, 23);
-
-	bitmap_fill(bmap1, 19);
-	expect_eq_pbl("0-18", bmap1, 1024);
-
+	bitmap_set(bmap1, 0, 19);
 	bitmap_copy(bmap2, bmap1, 23);
 	expect_eq_pbl("0-18", bmap2, 1024);
 
-	bitmap_fill(bmap2, 23);
-	expect_eq_pbl("0-22", bmap2, 1024);
-
+	bitmap_set(bmap2, 0, 23);
 	bitmap_copy(bmap2, bmap1, 23);
 	expect_eq_pbl("0-18", bmap2, 1024);
 
-	bitmap_zero(bmap1, 23);
-	expect_eq_pbl("", bmap1, 1024);
-
 	/* multi-word bitmaps */
-	bitmap_zero(bmap1, 1024);
-	expect_eq_pbl("", bmap1, 1024);
-
-	bitmap_fill(bmap1, 109);
-	expect_eq_pbl("0-108", bmap1, 1024);
-
+	bitmap_set(bmap1, 0, 109);
 	bitmap_copy(bmap2, bmap1, 1024);
 	expect_eq_pbl("0-108", bmap2, 1024);
 
 	bitmap_fill(bmap2, 1024);
-	expect_eq_pbl("0-1023", bmap2, 1024);
-
 	bitmap_copy(bmap2, bmap1, 1024);
 	expect_eq_pbl("0-108", bmap2, 1024);
 
@@ -160,9 +200,6 @@ static void __init test_zero_fill_copy(void)
 	bitmap_fill(bmap2, 1024);
 	bitmap_copy(bmap2, bmap1, 97);  /* ... but aligned on word length */
 	expect_eq_pbl("0-108,128-1023", bmap2, 1024);
-
-	bitmap_zero(bmap2, 97);  /* ... but 0-padded til word length */
-	expect_eq_pbl("128-1023", bmap2, 1024);
 }
 
 #define PARSE_TIME 0x1
@@ -255,171 +292,29 @@ static void __init test_bitmap_parselist(void)
 	}
 }
 
-static void __init test_bitmap_u32_array_conversions(void)
+static void __init test_bitmap_arr32(void)
 {
-	DECLARE_BITMAP(bmap1, 1024);
-	DECLARE_BITMAP(bmap2, 1024);
-	u32 exp_arr[32], arr[32];
-	unsigned nbits;
-
-	for (nbits = 0 ; nbits < 257 ; ++nbits) {
-		const unsigned int used_u32s = DIV_ROUND_UP(nbits, 32);
-		unsigned int i, rv;
-
-		bitmap_zero(bmap1, nbits);
-		bitmap_set(bmap1, nbits, 1024 - nbits);  /* garbage */
-
-		memset(arr, 0xff, sizeof(arr));
-		rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits);
-		expect_eq_uint(nbits, rv);
-
-		memset(exp_arr, 0xff, sizeof(exp_arr));
-		memset(exp_arr, 0, used_u32s*sizeof(*exp_arr));
-		expect_eq_u32_array(exp_arr, 32, arr, 32);
-
-		bitmap_fill(bmap2, 1024);
-		rv = bitmap_from_u32array(bmap2, nbits, arr, used_u32s);
-		expect_eq_uint(nbits, rv);
-		expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
-
-		for (i = 0 ; i < nbits ; ++i) {
-			/*
-			 * test conversion bitmap -> u32[]
-			 */
-
-			bitmap_zero(bmap1, 1024);
-			__set_bit(i, bmap1);
-			bitmap_set(bmap1, nbits, 1024 - nbits);  /* garbage */
-
-			memset(arr, 0xff, sizeof(arr));
-			rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits);
-			expect_eq_uint(nbits, rv);
-
-			/* 1st used u32 words contain expected bit set, the
-			 * remaining words are left unchanged (0xff)
-			 */
-			memset(exp_arr, 0xff, sizeof(exp_arr));
-			memset(exp_arr, 0, used_u32s*sizeof(*exp_arr));
-			exp_arr[i/32] = (1U<<(i%32));
-			expect_eq_u32_array(exp_arr, 32, arr, 32);
-
-
-			/* same, with longer array to fill
-			 */
-			memset(arr, 0xff, sizeof(arr));
-			rv = bitmap_to_u32array(arr, 32, bmap1, nbits);
-			expect_eq_uint(nbits, rv);
-
-			/* 1st used u32 words contain expected bit set, the
-			 * remaining words are all 0s
-			 */
-			memset(exp_arr, 0, sizeof(exp_arr));
-			exp_arr[i/32] = (1U<<(i%32));
-			expect_eq_u32_array(exp_arr, 32, arr, 32);
-
-			/*
-			 * test conversion u32[] -> bitmap
-			 */
-
-			/* the 1st nbits of bmap2 are identical to
-			 * bmap1, the remaining bits of bmap2 are left
-			 * unchanged (all 1s)
-			 */
-			bitmap_fill(bmap2, 1024);
-			rv = bitmap_from_u32array(bmap2, nbits,
-						  exp_arr, used_u32s);
-			expect_eq_uint(nbits, rv);
-
-			expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
-
-			/* same, with more bits to fill
-			 */
-			memset(arr, 0xff, sizeof(arr));  /* garbage */
-			memset(arr, 0, used_u32s*sizeof(u32));
-			arr[i/32] = (1U<<(i%32));
-
-			bitmap_fill(bmap2, 1024);
-			rv = bitmap_from_u32array(bmap2, 1024, arr, used_u32s);
-			expect_eq_uint(used_u32s*32, rv);
-
-			/* the 1st nbits of bmap2 are identical to
-			 * bmap1, the remaining bits of bmap2 are cleared
-			 */
-			bitmap_zero(bmap1, 1024);
-			__set_bit(i, bmap1);
-			expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
-
-
-			/*
-			 * test short conversion bitmap -> u32[] (1
-			 * word too short)
-			 */
-			if (used_u32s > 1) {
-				bitmap_zero(bmap1, 1024);
-				__set_bit(i, bmap1);
-				bitmap_set(bmap1, nbits,
-					   1024 - nbits);  /* garbage */
-				memset(arr, 0xff, sizeof(arr));
-
-				rv = bitmap_to_u32array(arr, used_u32s - 1,
-							bmap1, nbits);
-				expect_eq_uint((used_u32s - 1)*32, rv);
-
-				/* 1st used u32 words contain expected
-				 * bit set, the remaining words are
-				 * left unchanged (0xff)
-				 */
-				memset(exp_arr, 0xff, sizeof(exp_arr));
-				memset(exp_arr, 0,
-				       (used_u32s-1)*sizeof(*exp_arr));
-				if ((i/32) < (used_u32s - 1))
-					exp_arr[i/32] = (1U<<(i%32));
-				expect_eq_u32_array(exp_arr, 32, arr, 32);
-			}
-
-			/*
-			 * test short conversion u32[] -> bitmap (3
-			 * bits too short)
-			 */
-			if (nbits > 3) {
-				memset(arr, 0xff, sizeof(arr));  /* garbage */
-				memset(arr, 0, used_u32s*sizeof(*arr));
-				arr[i/32] = (1U<<(i%32));
-
-				bitmap_zero(bmap1, 1024);
-				rv = bitmap_from_u32array(bmap1, nbits - 3,
-							  arr, used_u32s);
-				expect_eq_uint(nbits - 3, rv);
-
-				/* we are expecting the bit < nbits -
-				 * 3 (none otherwise), and the rest of
-				 * bmap1 unchanged (0-filled)
-				 */
-				bitmap_zero(bmap2, 1024);
-				if (i < nbits - 3)
-					__set_bit(i, bmap2);
-				expect_eq_bitmap(bmap2, 1024, bmap1, 1024);
-
-				/* do the same with bmap1 initially
-				 * 1-filled
-				 */
-
-				bitmap_fill(bmap1, 1024);
-				rv = bitmap_from_u32array(bmap1, nbits - 3,
-							 arr, used_u32s);
-				expect_eq_uint(nbits - 3, rv);
-
-				/* we are expecting the bit < nbits -
-				 * 3 (none otherwise), and the rest of
-				 * bmap1 unchanged (1-filled)
-				 */
-				bitmap_zero(bmap2, 1024);
-				if (i < nbits - 3)
-					__set_bit(i, bmap2);
-				bitmap_set(bmap2, nbits-3, 1024 - nbits + 3);
-				expect_eq_bitmap(bmap2, 1024, bmap1, 1024);
-			}
-		}
+	unsigned int nbits, next_bit, len = sizeof(exp) * 8;
+	u32 arr[sizeof(exp) / 4];
+	DECLARE_BITMAP(bmap2, len);
+
+	memset(arr, 0xa5, sizeof(arr));
+
+	for (nbits = 0; nbits < len; ++nbits) {
+		bitmap_to_arr32(arr, exp, nbits);
+		bitmap_from_arr32(bmap2, arr, nbits);
+		expect_eq_bitmap(bmap2, exp, nbits);
+
+		next_bit = find_next_bit(bmap2,
+				round_up(nbits, BITS_PER_LONG), nbits);
+		if (next_bit < round_up(nbits, BITS_PER_LONG))
+			pr_err("bitmap_copy_arr32(nbits == %d:"
+				" tail is not safely cleared: %d\n",
+				nbits, next_bit);
+
+		if (nbits < len - 32)
+			expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)],
+								0xa5a5a5a5);
 	}
 }
 
@@ -453,8 +348,10 @@ static void noinline __init test_mem_optimisations(void)
 
 static int __init test_bitmap_init(void)
 {
-	test_zero_fill_copy();
-	test_bitmap_u32_array_conversions();
+	test_zero_clear();
+	test_fill_set();
+	test_copy();
+	test_bitmap_arr32();
 	test_bitmap_parselist();
 	test_mem_optimisations();
 
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index f369889e521d..b4e22345963f 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -83,6 +83,7 @@ struct bpf_test {
 		__u32 result;
 	} test[MAX_SUBTESTS];
 	int (*fill_helper)(struct bpf_test *self);
+	int expected_errcode; /* used when FLAG_EXPECTED_FAIL is set in the aux */
 	__u8 frag_data[MAX_DATA];
 	int stack_depth; /* for eBPF only, since tests don't call verifier */
 };
@@ -2003,10 +2004,14 @@ static struct bpf_test tests[] = {
 		{ { 4, 0 }, { 5, 10 } }
 	},
 	{
-		"INT: DIV by zero",
+		/* This one doesn't go through verifier, but is just raw insn
+		 * as opposed to cBPF tests from here. Thus div by 0 tests are
+		 * done in test_verifier in BPF kselftests.
+		 */
+		"INT: DIV by -1",
 		.u.insns_int = {
 			BPF_ALU64_REG(BPF_MOV, R6, R1),
-			BPF_ALU64_IMM(BPF_MOV, R7, 0),
+			BPF_ALU64_IMM(BPF_MOV, R7, -1),
 			BPF_LD_ABS(BPF_B, 3),
 			BPF_ALU32_REG(BPF_DIV, R0, R7),
 			BPF_EXIT_INSN(),
@@ -2022,7 +2027,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: div_k_0",
@@ -2032,7 +2039,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: unknown insn",
@@ -2043,7 +2052,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: out of range spill/fill",
@@ -2053,7 +2064,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"JUMPS + HOLES",
@@ -2145,6 +2158,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: LDX + RET X",
@@ -2155,6 +2170,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Mainly checking JIT here. */
 		"M[]: alt STX + LDX",
@@ -2329,6 +2346,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Passes checker but fails during runtime. */
 		"LD [SKF_AD_OFF-1]",
@@ -5391,6 +5410,7 @@ static struct bpf_test tests[] = {
 		{ },
 		{ },
 		.fill_helper = bpf_fill_maxinsns4,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Very long jump",
@@ -5446,10 +5466,15 @@ static struct bpf_test tests[] = {
 	{
 		"BPF_MAXINSNS: Jump, gap, jump, ...",
 		{ },
+#ifdef CONFIG_BPF_JIT_ALWAYS_ON
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC | FLAG_NO_DATA,
+#endif
 		{ },
 		{ { 0, 0xababcbac } },
 		.fill_helper = bpf_fill_maxinsns11,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{
 		"BPF_MAXINSNS: ld_abs+get_processor_id",
@@ -6109,6 +6134,110 @@ static struct bpf_test tests[] = {
 		{ { ETH_HLEN, 42 } },
 		.fill_helper = bpf_fill_ld_abs_vlan_push_pop2,
 	},
+	/* Checking interpreter vs JIT wrt signed extended imms. */
+	{
+		"JNE signed compare, test 1",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R2, -17104896, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 2",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R2, 0xfefb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 3",
+		.u.insns_int = {
+			BPF_ALU32_IMM(BPF_MOV, R1, 0xfefbbc12),
+			BPF_ALU32_IMM(BPF_MOV, R3, 0xffff0000),
+			BPF_ALU32_IMM(BPF_MOV, R4, 0xfefb0000),
+			BPF_MOV64_REG(R2, R1),
+			BPF_ALU64_REG(BPF_AND, R2, R3),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_REG(BPF_JNE, R2, R4, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 4",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, -17104896),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, -17104896, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 5",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0xfefb0000),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, 0xfefb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } },
+	},
+	{
+		"JNE signed compare, test 6",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0x7efb0000),
+			BPF_ALU32_IMM(BPF_MOV, R0, 1),
+			BPF_JMP_IMM(BPF_JNE, R1, 0x7efb0000, 1),
+			BPF_ALU32_IMM(BPF_MOV, R0, 2),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 2 } },
+	},
+	{
+		"JNE signed compare, test 7",
+		.u.insns = {
+			BPF_STMT(BPF_LD | BPF_IMM, 0xffff0000),
+			BPF_STMT(BPF_MISC | BPF_TAX, 0),
+			BPF_STMT(BPF_LD | BPF_IMM, 0xfefbbc12),
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_X, 0),
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, 0xfefb0000, 1, 0),
+			BPF_STMT(BPF_RET | BPF_K, 1),
+			BPF_STMT(BPF_RET | BPF_K, 2),
+		},
+		CLASSIC | FLAG_NO_DATA,
+		{},
+		{ { 0, 2 } },
+	},
 };
 
 static struct net_device dev;
@@ -6236,7 +6365,7 @@ static struct bpf_prog *generate_filter(int which, int *err)
 
 		*err = bpf_prog_create(&fp, &fprog);
 		if (tests[which].aux & FLAG_EXPECTED_FAIL) {
-			if (*err == -EINVAL) {
+			if (*err == tests[which].expected_errcode) {
 				pr_cont("PASS\n");
 				/* Verifier rejected filter as expected. */
 				*err = 0;
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 64a4c76cba2b..078a61480573 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -96,7 +96,7 @@ struct test_config {
 			    struct device *device);
 };
 
-struct test_config *test_fw_config;
+static struct test_config *test_fw_config;
 
 static ssize_t test_fw_misc_read(struct file *f, char __user *buf,
 				 size_t size, loff_t *offset)
@@ -359,7 +359,7 @@ static ssize_t config_name_show(struct device *dev,
 {
 	return config_test_show_str(buf, test_fw_config->name);
 }
-static DEVICE_ATTR(config_name, 0644, config_name_show, config_name_store);
+static DEVICE_ATTR_RW(config_name);
 
 static ssize_t config_num_requests_store(struct device *dev,
 					 struct device_attribute *attr,
@@ -371,6 +371,7 @@ static ssize_t config_num_requests_store(struct device *dev,
 	if (test_fw_config->reqs) {
 		pr_err("Must call release_all_firmware prior to changing config\n");
 		rc = -EINVAL;
+		mutex_unlock(&test_fw_mutex);
 		goto out;
 	}
 	mutex_unlock(&test_fw_mutex);
@@ -388,8 +389,7 @@ static ssize_t config_num_requests_show(struct device *dev,
 {
 	return test_dev_config_show_u8(buf, test_fw_config->num_requests);
 }
-static DEVICE_ATTR(config_num_requests, 0644, config_num_requests_show,
-		   config_num_requests_store);
+static DEVICE_ATTR_RW(config_num_requests);
 
 static ssize_t config_sync_direct_store(struct device *dev,
 					struct device_attribute *attr,
@@ -411,8 +411,7 @@ static ssize_t config_sync_direct_show(struct device *dev,
 {
 	return test_dev_config_show_bool(buf, test_fw_config->sync_direct);
 }
-static DEVICE_ATTR(config_sync_direct, 0644, config_sync_direct_show,
-		   config_sync_direct_store);
+static DEVICE_ATTR_RW(config_sync_direct);
 
 static ssize_t config_send_uevent_store(struct device *dev,
 					struct device_attribute *attr,
@@ -428,8 +427,7 @@ static ssize_t config_send_uevent_show(struct device *dev,
 {
 	return test_dev_config_show_bool(buf, test_fw_config->send_uevent);
 }
-static DEVICE_ATTR(config_send_uevent, 0644, config_send_uevent_show,
-		   config_send_uevent_store);
+static DEVICE_ATTR_RW(config_send_uevent);
 
 static ssize_t config_read_fw_idx_store(struct device *dev,
 					struct device_attribute *attr,
@@ -445,8 +443,7 @@ static ssize_t config_read_fw_idx_show(struct device *dev,
 {
 	return test_dev_config_show_u8(buf, test_fw_config->read_fw_idx);
 }
-static DEVICE_ATTR(config_read_fw_idx, 0644, config_read_fw_idx_show,
-		   config_read_fw_idx_store);
+static DEVICE_ATTR_RW(config_read_fw_idx);
 
 
 static ssize_t trigger_request_store(struct device *dev,
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index ef1a3ac1397e..98854a64b014 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -94,6 +94,37 @@ static noinline void __init kmalloc_pagealloc_oob_right(void)
 	ptr[size] = 0;
 	kfree(ptr);
 }
+
+static noinline void __init kmalloc_pagealloc_uaf(void)
+{
+	char *ptr;
+	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
+
+	pr_info("kmalloc pagealloc allocation: use-after-free\n");
+	ptr = kmalloc(size, GFP_KERNEL);
+	if (!ptr) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	kfree(ptr);
+	ptr[0] = 0;
+}
+
+static noinline void __init kmalloc_pagealloc_invalid_free(void)
+{
+	char *ptr;
+	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
+
+	pr_info("kmalloc pagealloc allocation: invalid-free\n");
+	ptr = kmalloc(size, GFP_KERNEL);
+	if (!ptr) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	kfree(ptr + 1);
+}
 #endif
 
 static noinline void __init kmalloc_large_oob_right(void)
@@ -388,7 +419,7 @@ static noinline void __init kasan_stack_oob(void)
 static noinline void __init ksize_unpoisons_memory(void)
 {
 	char *ptr;
-	size_t size = 123, real_size = size;
+	size_t size = 123, real_size;
 
 	pr_info("ksize() unpoisons the whole allocated chunk\n");
 	ptr = kmalloc(size, GFP_KERNEL);
@@ -472,6 +503,74 @@ static noinline void __init use_after_scope_test(void)
 	p[1023] = 1;
 }
 
+static noinline void __init kasan_alloca_oob_left(void)
+{
+	volatile int i = 10;
+	char alloca_array[i];
+	char *p = alloca_array - 1;
+
+	pr_info("out-of-bounds to left on alloca\n");
+	*(volatile char *)p;
+}
+
+static noinline void __init kasan_alloca_oob_right(void)
+{
+	volatile int i = 10;
+	char alloca_array[i];
+	char *p = alloca_array + i;
+
+	pr_info("out-of-bounds to right on alloca\n");
+	*(volatile char *)p;
+}
+
+static noinline void __init kmem_cache_double_free(void)
+{
+	char *p;
+	size_t size = 200;
+	struct kmem_cache *cache;
+
+	cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
+	if (!cache) {
+		pr_err("Cache allocation failed\n");
+		return;
+	}
+	pr_info("double-free on heap object\n");
+	p = kmem_cache_alloc(cache, GFP_KERNEL);
+	if (!p) {
+		pr_err("Allocation failed\n");
+		kmem_cache_destroy(cache);
+		return;
+	}
+
+	kmem_cache_free(cache, p);
+	kmem_cache_free(cache, p);
+	kmem_cache_destroy(cache);
+}
+
+static noinline void __init kmem_cache_invalid_free(void)
+{
+	char *p;
+	size_t size = 200;
+	struct kmem_cache *cache;
+
+	cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU,
+				  NULL);
+	if (!cache) {
+		pr_err("Cache allocation failed\n");
+		return;
+	}
+	pr_info("invalid-free of heap object\n");
+	p = kmem_cache_alloc(cache, GFP_KERNEL);
+	if (!p) {
+		pr_err("Allocation failed\n");
+		kmem_cache_destroy(cache);
+		return;
+	}
+
+	kmem_cache_free(cache, p + 1);
+	kmem_cache_destroy(cache);
+}
+
 static int __init kmalloc_tests_init(void)
 {
 	/*
@@ -485,6 +584,8 @@ static int __init kmalloc_tests_init(void)
 	kmalloc_node_oob_right();
 #ifdef CONFIG_SLUB
 	kmalloc_pagealloc_oob_right();
+	kmalloc_pagealloc_uaf();
+	kmalloc_pagealloc_invalid_free();
 #endif
 	kmalloc_large_oob_right();
 	kmalloc_oob_krealloc_more();
@@ -502,9 +603,13 @@ static int __init kmalloc_tests_init(void)
 	memcg_accounted_kmem_cache();
 	kasan_stack_oob();
 	kasan_global_oob();
+	kasan_alloca_oob_left();
+	kasan_alloca_oob_right();
 	ksize_unpoisons_memory();
 	copy_user_test();
 	use_after_scope_test();
+	kmem_cache_double_free();
+	kmem_cache_invalid_free();
 
 	kasan_restore_multi_shot(multishot);
 
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index 337f408b4de6..e372b97eee13 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -694,8 +694,7 @@ static ssize_t config_test_driver_show(struct device *dev,
 	return config_test_show_str(&test_dev->config_mutex, buf,
 				    config->test_driver);
 }
-static DEVICE_ATTR(config_test_driver, 0644, config_test_driver_show,
-		   config_test_driver_store);
+static DEVICE_ATTR_RW(config_test_driver);
 
 static ssize_t config_test_fs_store(struct device *dev,
 				    struct device_attribute *attr,
@@ -726,8 +725,7 @@ static ssize_t config_test_fs_show(struct device *dev,
 	return config_test_show_str(&test_dev->config_mutex, buf,
 				    config->test_fs);
 }
-static DEVICE_ATTR(config_test_fs, 0644, config_test_fs_show,
-		   config_test_fs_store);
+static DEVICE_ATTR_RW(config_test_fs);
 
 static int trigger_config_run_type(struct kmod_test_device *test_dev,
 				   enum kmod_test_case test_case,
@@ -1012,8 +1010,7 @@ static ssize_t config_num_threads_show(struct device *dev,
 
 	return test_dev_config_show_int(test_dev, buf, config->num_threads);
 }
-static DEVICE_ATTR(config_num_threads, 0644, config_num_threads_show,
-		   config_num_threads_store);
+static DEVICE_ATTR_RW(config_num_threads);
 
 static ssize_t config_test_case_store(struct device *dev,
 				      struct device_attribute *attr,
@@ -1037,8 +1034,7 @@ static ssize_t config_test_case_show(struct device *dev,
 
 	return test_dev_config_show_uint(test_dev, buf, config->test_case);
 }
-static DEVICE_ATTR(config_test_case, 0644, config_test_case_show,
-		   config_test_case_store);
+static DEVICE_ATTR_RW(config_test_case);
 
 static ssize_t test_result_show(struct device *dev,
 				struct device_attribute *attr,
@@ -1049,7 +1045,7 @@ static ssize_t test_result_show(struct device *dev,
 
 	return test_dev_config_show_int(test_dev, buf, config->test_result);
 }
-static DEVICE_ATTR(test_result, 0644, test_result_show, test_result_store);
+static DEVICE_ATTR_RW(test_result);
 
 #define TEST_KMOD_DEV_ATTR(name)		&dev_attr_##name.attr
 
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 8e83cbdc049c..76d3667fdea2 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -162,11 +162,7 @@ static void test_bucket_stats(struct rhashtable *ht, unsigned int entries)
 		return;
 	}
 
-	err = rhashtable_walk_start(&hti);
-	if (err && err != -EAGAIN) {
-		pr_warn("Test failed: iterator failed: %d\n", err);
-		return;
-	}
+	rhashtable_walk_start(&hti);
 
 	while ((pos = rhashtable_walk_next(&hti))) {
 		if (PTR_ERR(pos) == -EAGAIN) {
diff --git a/lib/test_sort.c b/lib/test_sort.c
index d389c1cc2f6c..385c0ed5202f 100644
--- a/lib/test_sort.c
+++ b/lib/test_sort.c
@@ -39,5 +39,11 @@ exit:
 	return err;
 }
 
+static void __exit test_sort_exit(void)
+{
+}
+
 module_init(test_sort_init);
+module_exit(test_sort_exit);
+
 MODULE_LICENSE("GPL");
diff --git a/lib/ubsan.c b/lib/ubsan.c
index fb0409df1bcf..59fee96c29a0 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -141,11 +141,6 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type,
 	}
 }
 
-static bool location_is_valid(struct source_location *loc)
-{
-	return loc->file_name != NULL;
-}
-
 static DEFINE_SPINLOCK(report_lock);
 
 static void ubsan_prologue(struct source_location *location,
@@ -265,14 +260,14 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data,
 }
 EXPORT_SYMBOL(__ubsan_handle_divrem_overflow);
 
-static void handle_null_ptr_deref(struct type_mismatch_data *data)
+static void handle_null_ptr_deref(struct type_mismatch_data_common *data)
 {
 	unsigned long flags;
 
-	if (suppress_report(&data->location))
+	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(data->location, &flags);
 
 	pr_err("%s null pointer of type %s\n",
 		type_check_kinds[data->type_check_kind],
@@ -281,15 +276,15 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data)
 	ubsan_epilogue(&flags);
 }
 
-static void handle_missaligned_access(struct type_mismatch_data *data,
+static void handle_misaligned_access(struct type_mismatch_data_common *data,
 				unsigned long ptr)
 {
 	unsigned long flags;
 
-	if (suppress_report(&data->location))
+	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(data->location, &flags);
 
 	pr_err("%s misaligned address %p for type %s\n",
 		type_check_kinds[data->type_check_kind],
@@ -299,15 +294,15 @@ static void handle_missaligned_access(struct type_mismatch_data *data,
 	ubsan_epilogue(&flags);
 }
 
-static void handle_object_size_mismatch(struct type_mismatch_data *data,
+static void handle_object_size_mismatch(struct type_mismatch_data_common *data,
 					unsigned long ptr)
 {
 	unsigned long flags;
 
-	if (suppress_report(&data->location))
+	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(data->location, &flags);
 	pr_err("%s address %p with insufficient space\n",
 		type_check_kinds[data->type_check_kind],
 		(void *) ptr);
@@ -315,37 +310,46 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data,
 	ubsan_epilogue(&flags);
 }
 
-void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
+static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
 				unsigned long ptr)
 {
 
 	if (!ptr)
 		handle_null_ptr_deref(data);
 	else if (data->alignment && !IS_ALIGNED(ptr, data->alignment))
-		handle_missaligned_access(data, ptr);
+		handle_misaligned_access(data, ptr);
 	else
 		handle_object_size_mismatch(data, ptr);
 }
-EXPORT_SYMBOL(__ubsan_handle_type_mismatch);
 
-void __ubsan_handle_nonnull_return(struct nonnull_return_data *data)
+void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
+				unsigned long ptr)
 {
-	unsigned long flags;
-
-	if (suppress_report(&data->location))
-		return;
-
-	ubsan_prologue(&data->location, &flags);
+	struct type_mismatch_data_common common_data = {
+		.location = &data->location,
+		.type = data->type,
+		.alignment = data->alignment,
+		.type_check_kind = data->type_check_kind
+	};
+
+	ubsan_type_mismatch_common(&common_data, ptr);
+}
+EXPORT_SYMBOL(__ubsan_handle_type_mismatch);
 
-	pr_err("null pointer returned from function declared to never return null\n");
+void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data,
+				unsigned long ptr)
+{
 
-	if (location_is_valid(&data->attr_location))
-		print_source_location("returns_nonnull attribute specified in",
-				&data->attr_location);
+	struct type_mismatch_data_common common_data = {
+		.location = &data->location,
+		.type = data->type,
+		.alignment = 1UL << data->log_alignment,
+		.type_check_kind = data->type_check_kind
+	};
 
-	ubsan_epilogue(&flags);
+	ubsan_type_mismatch_common(&common_data, ptr);
 }
-EXPORT_SYMBOL(__ubsan_handle_nonnull_return);
+EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
 
 void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data,
 					unsigned long bound)
diff --git a/lib/ubsan.h b/lib/ubsan.h
index 88f23557edbe..f4d8d0bd4016 100644
--- a/lib/ubsan.h
+++ b/lib/ubsan.h
@@ -37,15 +37,24 @@ struct type_mismatch_data {
 	unsigned char type_check_kind;
 };
 
-struct nonnull_arg_data {
+struct type_mismatch_data_v1 {
 	struct source_location location;
-	struct source_location attr_location;
-	int arg_index;
+	struct type_descriptor *type;
+	unsigned char log_alignment;
+	unsigned char type_check_kind;
+};
+
+struct type_mismatch_data_common {
+	struct source_location *location;
+	struct type_descriptor *type;
+	unsigned long alignment;
+	unsigned char type_check_kind;
 };
 
-struct nonnull_return_data {
+struct nonnull_arg_data {
 	struct source_location location;
 	struct source_location attr_location;
+	int arg_index;
 };
 
 struct vla_bound_data {
diff --git a/lib/usercopy.c b/lib/usercopy.c
index 15e2e6fb060e..3744b2a8e591 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -20,7 +20,7 @@ EXPORT_SYMBOL(_copy_from_user);
 #endif
 
 #ifndef INLINE_COPY_TO_USER
-unsigned long _copy_to_user(void *to, const void __user *from, unsigned long n)
+unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
 	if (likely(access_ok(VERIFY_WRITE, to, n))) {
diff --git a/lib/uuid.c b/lib/uuid.c
index 680b9fb9ba09..2290b9f001a9 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -29,15 +29,14 @@ EXPORT_SYMBOL(uuid_null);
 const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
 const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
 
-/***************************************************************
+/**
+ * generate_random_uuid - generate a random UUID
+ * @uuid: where to put the generated UUID
+ *
  * Random UUID interface
  *
- * Used here for a Boot ID, but can be useful for other kernel
- * drivers.
- ***************************************************************/
-
-/*
- * Generate random UUID
+ * Used to create a Boot ID or a filesystem UUID/GUID, but can be
+ * useful for other kernel drivers.
  */
 void generate_random_uuid(unsigned char uuid[16])
 {
@@ -73,16 +72,17 @@ void uuid_gen(uuid_t *bu)
 EXPORT_SYMBOL_GPL(uuid_gen);
 
 /**
-  * uuid_is_valid - checks if UUID string valid
-  * @uuid:	UUID string to check
-  *
-  * Description:
-  * It checks if the UUID string is following the format:
-  *	xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
-  * where x is a hex digit.
-  *
-  * Return: true if input is valid UUID string.
-  */
+ * uuid_is_valid - checks if a UUID string is valid
+ * @uuid:	UUID string to check
+ *
+ * Description:
+ * It checks if the UUID string is following the format:
+ *	xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ *
+ * where x is a hex digit.
+ *
+ * Return: true if input is valid UUID string.
+ */
 bool uuid_is_valid(const char *uuid)
 {
 	unsigned int i;
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 01c3957b2de6..77ee6ced11b1 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -42,7 +42,6 @@
 #include "../mm/internal.h"	/* For the trace_print_flags arrays */
 
 #include <asm/page.h>		/* for PAGE_SIZE */
-#include <asm/sections.h>	/* for dereference_function_descriptor() */
 #include <asm/byteorder.h>	/* cpu_to_le16 */
 
 #include <linux/string_helpers.h>
@@ -1834,7 +1833,8 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
  *
  * - 'x' For printing the address. Equivalent to "%lx".
  *
- * ** Please update also Documentation/printk-formats.txt when making changes **
+ * ** When making changes please also update:
+ *	Documentation/core-api/printk-formats.rst
  *
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
@@ -1862,10 +1862,10 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 	switch (*fmt) {
 	case 'F':
 	case 'f':
-		ptr = dereference_function_descriptor(ptr);
-		/* Fallthrough */
 	case 'S':
 	case 's':
+		ptr = dereference_symbol_descriptor(ptr);
+		/* Fallthrough */
 	case 'B':
 		return symbol_string(buf, end, ptr, spec, fmt);
 	case 'R':
@@ -2194,7 +2194,7 @@ set_precision(struct printf_spec *spec, int prec)
  *  - ``%n`` is unsupported
  *  - ``%p*`` is handled by pointer()
  *
- * See pointer() or Documentation/printk-formats.txt for more
+ * See pointer() or Documentation/core-api/printk-formats.rst for more
  * extensive description.
  *
  * **Please update the documentation in both places when making changes**
@@ -2516,29 +2516,34 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
 {
 	struct printf_spec spec = {0};
 	char *str, *end;
+	int width;
 
 	str = (char *)bin_buf;
 	end = (char *)(bin_buf + size);
 
 #define save_arg(type)							\
-do {									\
+({									\
+	unsigned long long value;					\
 	if (sizeof(type) == 8) {					\
-		unsigned long long value;				\
+		unsigned long long val8;				\
 		str = PTR_ALIGN(str, sizeof(u32));			\
-		value = va_arg(args, unsigned long long);		\
+		val8 = va_arg(args, unsigned long long);		\
 		if (str + sizeof(type) <= end) {			\
-			*(u32 *)str = *(u32 *)&value;			\
-			*(u32 *)(str + 4) = *((u32 *)&value + 1);	\
+			*(u32 *)str = *(u32 *)&val8;			\
+			*(u32 *)(str + 4) = *((u32 *)&val8 + 1);	\
 		}							\
+		value = val8;						\
 	} else {							\
-		unsigned long value;					\
+		unsigned int val4;					\
 		str = PTR_ALIGN(str, sizeof(type));			\
-		value = va_arg(args, int);				\
+		val4 = va_arg(args, int);				\
 		if (str + sizeof(type) <= end)				\
-			*(typeof(type) *)str = (type)value;		\
+			*(typeof(type) *)str = (type)(long)val4;	\
+		value = (unsigned long long)val4;			\
 	}								\
 	str += sizeof(type);						\
-} while (0)
+	value;								\
+})
 
 	while (*fmt) {
 		int read = format_decode(fmt, &spec);
@@ -2554,7 +2559,10 @@ do {									\
 
 		case FORMAT_TYPE_WIDTH:
 		case FORMAT_TYPE_PRECISION:
-			save_arg(int);
+			width = (int)save_arg(int);
+			/* Pointers may require the width */
+			if (*fmt == 'p')
+				set_field_width(&spec, width);
 			break;
 
 		case FORMAT_TYPE_CHAR:
@@ -2576,7 +2584,27 @@ do {									\
 		}
 
 		case FORMAT_TYPE_PTR:
-			save_arg(void *);
+			/* Dereferenced pointers must be done now */
+			switch (*fmt) {
+			/* Dereference of functions is still OK */
+			case 'S':
+			case 's':
+			case 'F':
+			case 'f':
+				save_arg(void *);
+				break;
+			default:
+				if (!isalnum(*fmt)) {
+					save_arg(void *);
+					break;
+				}
+				str = pointer(fmt, str, end, va_arg(args, void *),
+					      spec);
+				if (str + 1 < end)
+					*str++ = '\0';
+				else
+					end[-1] = '\0'; /* Must be nul terminated */
+			}
 			/* skip all alphanumeric pointer suffixes */
 			while (isalnum(*fmt))
 				fmt++;
@@ -2728,11 +2756,39 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
 			break;
 		}
 
-		case FORMAT_TYPE_PTR:
-			str = pointer(fmt, str, end, get_arg(void *), spec);
+		case FORMAT_TYPE_PTR: {
+			bool process = false;
+			int copy, len;
+			/* Non function dereferences were already done */
+			switch (*fmt) {
+			case 'S':
+			case 's':
+			case 'F':
+			case 'f':
+				process = true;
+				break;
+			default:
+				if (!isalnum(*fmt)) {
+					process = true;
+					break;
+				}
+				/* Pointer dereference was already processed */
+				if (str < end) {
+					len = copy = strlen(args);
+					if (copy > end - str)
+						copy = end - str;
+					memcpy(str, args, copy);
+					str += len;
+					args += len;
+				}
+			}
+			if (process)
+				str = pointer(fmt, str, end, get_arg(void *), spec);
+
 			while (isalnum(*fmt))
 				fmt++;
 			break;
+		}
 
 		case FORMAT_TYPE_PERCENT_CHAR:
 			if (str < end)