From fd589a1be20fdd76ef97700dd0185e7a060546dc Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jsarha@ti.com>
Date: Tue, 10 Nov 2015 18:12:42 +0200
Subject: ASoC: dapm: Reset dapm wcache after freeing damp widgets

If there is anything in damp->path_source_cache or
damp->path_sink_cache, it can not be valid after the widgets have been
freed. Without this patch a repeated remove and load of a machine
driver may cause NULL pointer reference in dapm_wcache_lookup() when a
freed widget, not belonging to any list, is haunting in the wcache.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Reported-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dapm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 7855cfe46b69..95a937eafb79 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -398,6 +398,7 @@ int snd_soc_dapm_del_routes(struct snd_soc_dapm_context *dapm,
 int snd_soc_dapm_weak_routes(struct snd_soc_dapm_context *dapm,
 			     const struct snd_soc_dapm_route *route, int num);
 void snd_soc_dapm_free_widget(struct snd_soc_dapm_widget *w);
+void snd_soc_dapm_reset_cache(struct snd_soc_dapm_context *dapm);
 
 /* dapm events */
 void snd_soc_dapm_stream_event(struct snd_soc_pcm_runtime *rtd, int stream,
-- 
cgit v1.2.1


From 34c06254ff82a815fdccdfae7517a06c9b768cee Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 5 Nov 2015 00:12:24 -0500
Subject: cgroup: fix cftype->file_offset handling

6f60eade2433 ("cgroup: generalize obtaining the handles of and
notifying cgroup files") introduced cftype->file_offset so that the
handles for per-css file instances can be recorded.  These handles
then can be used, for example, to generate file modified
notifications.

Unfortunately, it made the wrong assumption that files are created
once for a given css and removed on its destruction.  Due to the
dependencies among subsystems, a css may be hidden from userland and
then later shown again.  This is implemented by removing and
re-creating the affected files, so the associated kernfs_node for a
given cgroup file may change over time.  This incorrect assumption led
to the corruption of css->files lists.

Reimplement cftype->file_offset handling so that cgroup_file->kn is
protected by a lock and updated as files are created and destroyed.
This also makes keeping them on per-cgroup list unnecessary.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: James Sedgwick <jsedgwick@fb.com>
Fixes: 6f60eade2433 ("cgroup: generalize obtaining the handles of and notifying cgroup files")
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/cgroup-defs.h |  4 ----
 include/linux/cgroup.h      | 14 +-------------
 2 files changed, 1 insertion(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 60d44b26276d..869fd4a3d28e 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -90,7 +90,6 @@ enum {
  */
 struct cgroup_file {
 	/* do not access any fields from outside cgroup core */
-	struct list_head node;			/* anchored at css->files */
 	struct kernfs_node *kn;
 };
 
@@ -134,9 +133,6 @@ struct cgroup_subsys_state {
 	 */
 	u64 serial_nr;
 
-	/* all cgroup_files associated with this css */
-	struct list_head files;
-
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 22e3754f89c5..f64083030ad5 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -88,6 +88,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_rm_cftypes(struct cftype *cfts);
+void cgroup_file_notify(struct cgroup_file *cfile);
 
 char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
@@ -516,19 +517,6 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 	pr_cont_kernfs_path(cgrp->kn);
 }
 
-/**
- * cgroup_file_notify - generate a file modified event for a cgroup_file
- * @cfile: target cgroup_file
- *
- * @cfile must have been obtained by setting cftype->file_offset.
- */
-static inline void cgroup_file_notify(struct cgroup_file *cfile)
-{
-	/* might not have been created due to one of the CFTYPE selector flags */
-	if (cfile->kn)
-		kernfs_notify(cfile->kn);
-}
-
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
-- 
cgit v1.2.1


From aedf17f4515b12ba1cd73298e66baa69cf93010e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Mon, 16 Nov 2015 15:34:36 +0100
Subject: lightnvm: change max_phys_sect to uint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The max_phys_sect variable is defined as a char. We do a boundary check
to maximally allow 256 physical page descriptors per command. As we are
not indexing from zero. This expression is always false. Bump the
max_phys_sect to an unsigned int to support the range check.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 69c9057e1ab8..32b5369e814e 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -220,7 +220,7 @@ struct nvm_dev_ops {
 	nvm_dev_dma_alloc_fn	*dev_dma_alloc;
 	nvm_dev_dma_free_fn	*dev_dma_free;
 
-	uint8_t			max_phys_sect;
+	unsigned int		max_phys_sect;
 };
 
 struct nvm_lun {
-- 
cgit v1.2.1


From 11450469830f2481a9e7cb181609288d40f41323 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Mon, 16 Nov 2015 15:34:37 +0100
Subject: lightnvm: update bad block table format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The specification was changed to reflect a multi-value bad block table.
Instead of bit-based bad block table, the bad block table now allows
eight bad block categories. Currently four are defined:

 * Factory bad blocks
 * Grown bad blocks
 * Device-side reserved blocks
 * Host-side reserved blocks

The factory and grown bad blocks are the regular bad blocks. The
reserved blocks are either for internal use or external use. In
particular, the device-side reserved blocks allows the host to
bootstrap from a limited number of flash blocks. Reducing the flash
blocks to scan upon super block initialization.

Support for both get bad block table and set bad block table is added.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 32b5369e814e..9b3dc1bc9296 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -191,11 +191,11 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
 struct nvm_block;
 
 typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
-typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *);
+typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
 typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
 typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
 				nvm_l2p_update_fn *, void *);
-typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int,
+typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int,
 				nvm_bb_update_fn *, void *);
 typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
 typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
@@ -210,7 +210,7 @@ struct nvm_dev_ops {
 	nvm_id_fn		*identity;
 	nvm_get_l2p_tbl_fn	*get_l2p_tbl;
 	nvm_op_bb_tbl_fn	*get_bb_tbl;
-	nvm_op_set_bb_fn	*set_bb;
+	nvm_op_set_bb_fn	*set_bb_tbl;
 
 	nvm_submit_io_fn	*submit_io;
 	nvm_erase_blk_fn	*erase_block;
-- 
cgit v1.2.1


From 12be5edf68e785dd5dc8665db5a88152b49c1fe8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Mon, 16 Nov 2015 15:34:39 +0100
Subject: lightnvm: expose mccap in identify command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mccap field is required for I/O command option support. It defines the
following flash access modes:

 * SLC mode
 * Erase/Program Suspension
 * Scramble On/Off
 * Encryption

It is slotted in between mpos and cpar, changing the offset for
cpar as well.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 9b3dc1bc9296..2572856e2a89 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -74,6 +74,7 @@ struct nvm_id_group {
 	u32	tbet;
 	u32	tbem;
 	u32	mpos;
+	u32	mccap;
 	u16	cpar;
 	u8	res[913];
 } __packed;
-- 
cgit v1.2.1


From 73387e7bed260c89628fc6a4e3632b45be9776b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Mon, 16 Nov 2015 15:34:40 +0100
Subject: lightnvm: remove unused attrs in nvm_id structs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The nvm_id, nvm_id_group and nvm_addr_format data structures contain
reserved attributes. They are unused by media managers and targets.
Remove them.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 2572856e2a89..e6ef8aaf533f 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -58,7 +58,6 @@ enum {
 struct nvm_id_group {
 	u8	mtype;
 	u8	fmtype;
-	u16	res16;
 	u8	num_ch;
 	u8	num_lun;
 	u8	num_pln;
@@ -76,8 +75,7 @@ struct nvm_id_group {
 	u32	mpos;
 	u32	mccap;
 	u16	cpar;
-	u8	res[913];
-} __packed;
+};
 
 struct nvm_addr_format {
 	u8	ch_offset;
@@ -92,19 +90,16 @@ struct nvm_addr_format {
 	u8	pg_len;
 	u8	sect_offset;
 	u8	sect_len;
-	u8	res[4];
 };
 
 struct nvm_id {
 	u8	ver_id;
 	u8	vmnt;
 	u8	cgrps;
-	u8	res[5];
 	u32	cap;
 	u32	dom;
 	struct nvm_addr_format ppaf;
 	u8	ppat;
-	u8	resv[224];
 	struct nvm_id_group groups[4];
 } __packed;
 
-- 
cgit v1.2.1


From 7386af270c72be65c7cb2ba4ad0d4e70dc373106 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Mon, 16 Nov 2015 15:34:44 +0100
Subject: lightnvm: remove linear and device addr modes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The linear and device specific address modes can be replaced with a
simple offset and bit length conversion that is generic across all
devices.

This both simplifies the specification and removes the special case for
qemu nvme, that previously relied on the linear address mapping.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 154 +++++++++++------------------------------------
 1 file changed, 34 insertions(+), 120 deletions(-)

(limited to 'include')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index e6ef8aaf533f..cbe288acb1de 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -99,7 +99,6 @@ struct nvm_id {
 	u32	cap;
 	u32	dom;
 	struct nvm_addr_format ppaf;
-	u8	ppat;
 	struct nvm_id_group groups[4];
 } __packed;
 
@@ -119,39 +118,28 @@ struct nvm_tgt_instance {
 #define NVM_VERSION_MINOR 0
 #define NVM_VERSION_PATCH 0
 
-#define NVM_SEC_BITS (8)
-#define NVM_PL_BITS  (6)
-#define NVM_PG_BITS  (16)
 #define NVM_BLK_BITS (16)
-#define NVM_LUN_BITS (10)
+#define NVM_PG_BITS  (16)
+#define NVM_SEC_BITS (8)
+#define NVM_PL_BITS  (8)
+#define NVM_LUN_BITS (8)
 #define NVM_CH_BITS  (8)
 
 struct ppa_addr {
+	/* Generic structure for all addresses */
 	union {
-		/* Channel-based PPA format in nand 4x2x2x2x8x10 */
-		struct {
-			u64 ch		: 4;
-			u64 sec		: 2; /* 4 sectors per page */
-			u64 pl		: 2; /* 4 planes per LUN */
-			u64 lun		: 2; /* 4 LUNs per channel */
-			u64 pg		: 8; /* 256 pages per block */
-			u64 blk		: 10;/* 1024 blocks per plane */
-			u64 resved		: 36;
-		} chnl;
-
-		/* Generic structure for all addresses */
 		struct {
+			u64 blk		: NVM_BLK_BITS;
+			u64 pg		: NVM_PG_BITS;
 			u64 sec		: NVM_SEC_BITS;
 			u64 pl		: NVM_PL_BITS;
-			u64 pg		: NVM_PG_BITS;
-			u64 blk		: NVM_BLK_BITS;
 			u64 lun		: NVM_LUN_BITS;
 			u64 ch		: NVM_CH_BITS;
 		} g;
 
 		u64 ppa;
 	};
-} __packed;
+};
 
 struct nvm_rq {
 	struct nvm_tgt_instance *ins;
@@ -259,8 +247,7 @@ struct nvm_dev {
 	int blks_per_lun;
 	int sec_size;
 	int oob_size;
-	int addr_mode;
-	struct nvm_addr_format addr_format;
+	struct nvm_addr_format ppaf;
 
 	/* Calculated/Cached values. These do not reflect the actual usable
 	 * blocks at run-time.
@@ -286,118 +273,45 @@ struct nvm_dev {
 	char name[DISK_NAME_LEN];
 };
 
-/* fallback conversion */
-static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev,
-							struct ppa_addr r)
-{
-	struct ppa_addr l;
-
-	l.ppa = r.g.sec +
-		r.g.pg  * dev->sec_per_pg +
-		r.g.blk * (dev->pgs_per_blk *
-				dev->sec_per_pg) +
-		r.g.lun * (dev->blks_per_lun *
-				dev->pgs_per_blk *
-				dev->sec_per_pg) +
-		r.g.ch * (dev->blks_per_lun *
-				dev->pgs_per_blk *
-				dev->luns_per_chnl *
-				dev->sec_per_pg);
-
-	return l;
-}
-
-/* fallback conversion */
-static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev,
-							struct ppa_addr r)
-{
-	struct ppa_addr l;
-	int secs, pgs, blks, luns;
-	sector_t ppa = r.ppa;
-
-	l.ppa = 0;
-
-	div_u64_rem(ppa, dev->sec_per_pg, &secs);
-	l.g.sec = secs;
-
-	sector_div(ppa, dev->sec_per_pg);
-	div_u64_rem(ppa, dev->sec_per_blk, &pgs);
-	l.g.pg = pgs;
-
-	sector_div(ppa, dev->pgs_per_blk);
-	div_u64_rem(ppa, dev->blks_per_lun, &blks);
-	l.g.blk = blks;
-
-	sector_div(ppa, dev->blks_per_lun);
-	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
-	l.g.lun = luns;
-
-	sector_div(ppa, dev->luns_per_chnl);
-	l.g.ch = ppa;
-
-	return l;
-}
-
-static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r)
+static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
+						struct ppa_addr r)
 {
 	struct ppa_addr l;
 
-	l.ppa = 0;
-
-	l.chnl.sec = r.g.sec;
-	l.chnl.pl = r.g.pl;
-	l.chnl.pg = r.g.pg;
-	l.chnl.blk = r.g.blk;
-	l.chnl.lun = r.g.lun;
-	l.chnl.ch = r.g.ch;
+	l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset;
+	l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset;
+	l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset;
+	l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset;
+	l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset;
+	l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset;
 
 	return l;
 }
 
-static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r)
+static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
+						struct ppa_addr r)
 {
 	struct ppa_addr l;
 
-	l.ppa = 0;
-
-	l.g.sec = r.chnl.sec;
-	l.g.pl = r.chnl.pl;
-	l.g.pg = r.chnl.pg;
-	l.g.blk = r.chnl.blk;
-	l.g.lun = r.chnl.lun;
-	l.g.ch = r.chnl.ch;
+	/*
+	 * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc.
+	 */
+	l.g.blk = (r.ppa >> dev->ppaf.blk_offset) &
+					(((1 << dev->ppaf.blk_len) - 1));
+	l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) &
+					(((1 << dev->ppaf.pg_len) - 1));
+	l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) &
+					(((1 << dev->ppaf.sect_len) - 1));
+	l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) &
+					(((1 << dev->ppaf.pln_len) - 1));
+	l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) &
+					(((1 << dev->ppaf.lun_len) - 1));
+	l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) &
+					(((1 << dev->ppaf.ch_len) - 1));
 
 	return l;
 }
 
-static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev,
-						struct ppa_addr gppa)
-{
-	switch (dev->addr_mode) {
-	case NVM_ADDRMODE_LINEAR:
-		return __linear_to_generic_addr(dev, gppa);
-	case NVM_ADDRMODE_CHANNEL:
-		return __chnl_to_generic_addr(gppa);
-	default:
-		BUG();
-	}
-	return gppa;
-}
-
-static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev,
-						struct ppa_addr gppa)
-{
-	switch (dev->addr_mode) {
-	case NVM_ADDRMODE_LINEAR:
-		return __generic_to_linear_addr(dev, gppa);
-	case NVM_ADDRMODE_CHANNEL:
-		return __generic_to_chnl_addr(gppa);
-	default:
-		BUG();
-	}
-	return gppa;
-}
-
 static inline int ppa_empty(struct ppa_addr ppa_addr)
 {
 	return (ppa_addr.ppa == ADDR_EMPTY);
-- 
cgit v1.2.1


From 451c2b5caf37b526ae34a1081b71115e1de2d063 Mon Sep 17 00:00:00 2001
From: Aya Mahfouz <mahfouz.saif.elyazal@gmail.com>
Date: Wed, 18 Nov 2015 08:36:44 +0200
Subject: net: dns_resolver: convert time_t to time64_t

Changes the definition of the pointer _expiry from time_t to
time64_t. This is to handle the Y2038 problem where time_t
will overflow in the year 2038. The change is safe because
the kernel subsystems that call dns_query pass NULL.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Aya Mahfouz <mahfouz.saif.elyazal@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dns_resolver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h
index cc92268af89a..6ac3cad9aef1 100644
--- a/include/linux/dns_resolver.h
+++ b/include/linux/dns_resolver.h
@@ -27,7 +27,7 @@
 #ifdef __KERNEL__
 
 extern int dns_query(const char *type, const char *name, size_t namelen,
-		     const char *options, char **_result, time_t *_expiry);
+		     const char *options, char **_result, time64_t *_expiry);
 
 #endif /* KERNEL */
 
-- 
cgit v1.2.1


From db27a7a37aa0b1f8b373f8b0fb72a2ccaafb85b7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 5 Nov 2015 09:03:50 +0100
Subject: KVM: Provide function for VCPU lookup by id

Let's provide a function to lookup a VCPU by id.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
[split patch from refactoring patch]
---
 include/linux/kvm_host.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5706a2108f0a..c923350ca20a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -460,6 +460,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
 	     idx++)
 
+static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		if (vcpu->vcpu_id == id)
+			return vcpu;
+	return NULL;
+}
+
 #define kvm_for_each_memslot(memslot, slots)	\
 	for (memslot = &slots->memslots[0];	\
 	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\
-- 
cgit v1.2.1


From 851df3dc11136fde86ebd78ee7527cb43c7cd349 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 16 Nov 2015 22:34:58 +0100
Subject: scpi: hide get_scpi_ops in module from built-in code

The scpi_clock driver can be built-in when CONFIG_COMPILE_TEST
is set even when ARM_SCPI_PROTOCOL is a loadable module, and
that results in a link error:

drivers/built-in.o: In function `scpi_clocks_probe':
(.text+0x14453c): undefined reference to `get_scpi_ops'

Using #if IS_REACHABLE() around the get_scpi_ops() declaration
makes it build successfully in this case for compile-testing,
but the effect is the same as when ARM_SCPI_PROTOCOL is
disabled, as the code will not be used.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Punit Agrawal <punit.agrawal@arm.com>
---
 include/linux/scpi_protocol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h
index 80af3cd35ae4..72ce932c69b2 100644
--- a/include/linux/scpi_protocol.h
+++ b/include/linux/scpi_protocol.h
@@ -71,7 +71,7 @@ struct scpi_ops {
 	int (*sensor_get_value)(u16, u32 *);
 };
 
-#if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL)
+#if IS_REACHABLE(CONFIG_ARM_SCPI_PROTOCOL)
 struct scpi_ops *get_scpi_ops(void);
 #else
 static inline struct scpi_ops *get_scpi_ops(void) { return NULL; }
-- 
cgit v1.2.1


From a35bb4458e5e5c9dc19a0daa0629409285f3b25e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 19 Nov 2015 14:17:06 +0100
Subject: scsi: report 'INQUIRY result too short' once per host

Some host adapters (e.g. Hyper-V storvsc) are known for not respecting
the SPC-2/3/4 requirement for 'INQUIRY data (see table ...) shall
contain at least 36 bytes'. As a result we get tons on 'scsi 0:7:1:1:
scsi scan: INQUIRY result too short (5), using 36' messages on
console. This can be problematic for slow consoles. Introduce
short_inquiry flag in struct Scsi_Host to print the message once per
host.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_host.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index e113c757d555..3a22da73d59a 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -673,6 +673,9 @@ struct Scsi_Host {
 	unsigned use_blk_mq:1;
 	unsigned use_cmd_list:1;
 
+	/* Host responded with short (<36 bytes) INQUIRY result */
+	unsigned short_inquiry:1;
+
 	/*
 	 * Optional work queue to be utilized by the transport
 	 */
-- 
cgit v1.2.1


From ac0621971a26526cad8cf9db7626d5e50562a441 Mon Sep 17 00:00:00 2001
From: Gregory Greenman <gregory.greenman@intel.com>
Date: Tue, 17 Nov 2015 10:24:38 +0200
Subject: mac80211: always set the buf_size in AddBA req to 64

Advertising reordering window in ADDBA less than 64 can crash some APs,
an example is LinkSys WRT120N (with FW v1.0.07 build 002 Jun 18 2012).
On the other hand, a driver may need to limit Tx A-MPDU size for its own
reasons, like specific HW limitations.

Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 82045fca388b..760bc4d5a2cf 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2003,8 +2003,10 @@ enum ieee80211_hw_flags {
  *	it shouldn't be set.
  *
  * @max_tx_aggregation_subframes: maximum number of subframes in an
- *	aggregate an HT driver will transmit, used by the peer as a
- *	hint to size its reorder buffer.
+ *	aggregate an HT driver will transmit. Though ADDBA will advertise
+ *	a constant value of 64 as some older APs can crash if the window
+ *	size is smaller (an example is LinkSys WRT120N with FW v1.0.07
+ *	build 002 Jun 18 2012).
  *
  * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX
  *	(if %IEEE80211_HW_QUEUE_CONTROL is set)
-- 
cgit v1.2.1


From 0b59733b95f9d7af6bee6e6a4d0d444eb694c514 Mon Sep 17 00:00:00 2001
From: Javier Gonzalez <javier@javigon.com>
Date: Fri, 20 Nov 2015 13:47:56 +0100
Subject: lightnvm: keep track of block counts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Maintain number of in use blocks, free blocks, and bad blocks in a per
lun basis. This allows the upper layers to get information about the
state of each lun.

Also, account for blocks reserved to the device on the free block count.
nr_free_blocks matches now the actual number of blocks on the free list
when the device is booted.

Signed-off-by: Javier Gonzalez <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index cbe288acb1de..831a20cf070c 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -213,7 +213,9 @@ struct nvm_lun {
 	int lun_id;
 	int chnl_id;
 
+	unsigned int nr_inuse_blocks;	/* Number of used blocks */
 	unsigned int nr_free_blocks;	/* Number of unused blocks */
+	unsigned int nr_bad_blocks;	/* Number of bad blocks */
 	struct nvm_block *blocks;
 
 	spinlock_t lock;
-- 
cgit v1.2.1


From 2fde0e482db2b43bb4ed0e9aebfbe78ebcbbf5a6 Mon Sep 17 00:00:00 2001
From: Javier Gonzalez <javier@javigon.com>
Date: Fri, 20 Nov 2015 13:47:57 +0100
Subject: lightnvm: add free and bad lun info to show luns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add free block, used block, and bad block information to the show debug
interface. This information is used to debug how targets track blocks.

Also, change debug function name to make it more generic.

Signed-off-by: Javier Gonzalez <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 831a20cf070c..3db5552b17d5 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -380,7 +380,7 @@ typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
 								unsigned long);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
-typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *);
+typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
 struct nvmm_type {
 	const char *name;
@@ -404,7 +404,7 @@ struct nvmm_type {
 	nvmm_get_lun_fn *get_lun;
 
 	/* Statistics */
-	nvmm_free_blocks_print_fn *free_blocks_print;
+	nvmm_lun_info_print_fn *lun_info_print;
 	struct list_head list;
 };
 
-- 
cgit v1.2.1


From 614e4c4ebc75517295bccd29b20ddbc5b52af6fc Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 12 Nov 2015 11:00:04 +0100
Subject: perf/core: Robustify the perf_cgroup_from_task() RCU checks

This patch reinforces the lockdep checks performed by
perf_cgroup_from_tsk() by passing the perf_event_context
whenever possible. It is okay to not hold the RCU read lock
when we know we hold the ctx->lock. This patch makes sure this
property holds.

In some functions, such as perf_cgroup_sched_in(), we do not
pass the context because we are sure we are holding the RCU
read lock.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: edumazet@google.com
Link: http://lkml.kernel.org/r/1447322404-10920-3-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d841d33bcdc9..f9828a48f16a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -697,9 +697,11 @@ struct perf_cgroup {
  * if there is no cgroup event for the current CPU context.
  */
 static inline struct perf_cgroup *
-perf_cgroup_from_task(struct task_struct *task)
+perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx)
 {
-	return container_of(task_css(task, perf_event_cgrp_id),
+	return container_of(task_css_check(task, perf_event_cgrp_id,
+					   ctx ? lockdep_is_held(&ctx->lock)
+					       : true),
 			    struct perf_cgroup, css);
 }
 #endif /* CONFIG_CGROUP_PERF */
-- 
cgit v1.2.1


From 90eec103b96e30401c0b846045bf8a1c7159b6da Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 16 Nov 2015 11:08:45 +0100
Subject: treewide: Remove old email address

There were still a number of references to my old Red Hat email
address in the kernel source. Remove these while keeping the
Red Hat copyright notices intact.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/tlb.h   | 2 +-
 include/linux/jump_label.h  | 2 +-
 include/linux/lockdep.h     | 2 +-
 include/linux/proportions.h | 2 +-
 include/linux/uprobes.h     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index db284bff29dc..9dbb739cafa0 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -5,7 +5,7 @@
  * Copyright 2001 Red Hat, Inc.
  * Based on code from mm/memory.c Copyright Linus Torvalds and others.
  *
- * Copyright 2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright 2011 Red Hat, Inc., Peter Zijlstra
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 8dde55974f18..0536524bb9eb 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -5,7 +5,7 @@
  * Jump label support
  *
  * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
- * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
  *
  * DEPRECATED API:
  *
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 70400dc7660f..c57e424d914b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -2,7 +2,7 @@
  * Runtime locking correctness validator
  *
  *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  *
  * see Documentation/locking/lockdep-design.txt for more details.
  */
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index 5440f64d2942..21221338ad18 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -1,7 +1,7 @@
 /*
  * FLoating proportions
  *
- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
  *
  * This file contains the public data structure and API definitions.
  */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 0bdc72f36905..4a29c75b146e 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -21,7 +21,7 @@
  * Authors:
  *	Srikar Dronamraju
  *	Jim Keniston
- * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
  */
 
 #include <linux/errno.h>
-- 
cgit v1.2.1


From 7d267278a9ece963d77eefec61630223fce08c6c Mon Sep 17 00:00:00 2001
From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
Date: Fri, 20 Nov 2015 22:07:23 +0000
Subject: unix: avoid use-after-free in ep_remove_wait_queue

Rainer Weikusat <rweikusat@mobileactivedefense.com> writes:
An AF_UNIX datagram socket being the client in an n:1 association with
some server socket is only allowed to send messages to the server if the
receive queue of this socket contains at most sk_max_ack_backlog
datagrams. This implies that prospective writers might be forced to go
to sleep despite none of the message presently enqueued on the server
receive queue were sent by them. In order to ensure that these will be
woken up once space becomes again available, the present unix_dgram_poll
routine does a second sock_poll_wait call with the peer_wait wait queue
of the server socket as queue argument (unix_dgram_recvmsg does a wake
up on this queue after a datagram was received). This is inherently
problematic because the server socket is only guaranteed to remain alive
for as long as the client still holds a reference to it. In case the
connection is dissolved via connect or by the dead peer detection logic
in unix_dgram_sendmsg, the server socket may be freed despite "the
polling mechanism" (in particular, epoll) still has a pointer to the
corresponding peer_wait queue. There's no way to forcibly deregister a
wait queue with epoll.

Based on an idea by Jason Baron, the patch below changes the code such
that a wait_queue_t belonging to the client socket is enqueued on the
peer_wait queue of the server whenever the peer receive queue full
condition is detected by either a sendmsg or a poll. A wake up on the
peer queue is then relayed to the ordinary wait queue of the client
socket via wake function. The connection to the peer wait queue is again
dissolved if either a wake up is about to be relayed or the client
socket reconnects or a dead peer is detected or the client socket is
itself closed. This enables removing the second sock_poll_wait from
unix_dgram_poll, thus avoiding the use-after-free, while still ensuring
that no blocked writer sleeps forever.

Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com>
Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets")
Reviewed-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index b36d837c701e..2a91a0561a47 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,6 +62,7 @@ struct unix_sock {
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
+	wait_queue_t		peer_wake;
 };
 
 static inline struct unix_sock *unix_sk(const struct sock *sk)
-- 
cgit v1.2.1


From c86b3de8c8b02d7e474fdc002c8df533b844524c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 17 Nov 2015 17:48:52 +0100
Subject: thermal: fix thermal_zone_bind_cooling_device prototype

When the prototype for thermal_zone_bind_cooling_device
changed, the static inline wrapper function was left alone,
which in theory can cause build warnings:

I have seen this error in the past:
drivers/thermal/db8500_thermal.c: In function 'db8500_cdev_bind':
drivers/thermal/db8500_thermal.c:78:9: error: too many arguments to function 'thermal_zone_bind_cooling_device'
   ret = thermal_zone_bind_cooling_device(thermal, i, cdev,

while this one no longer shows up, there is no doubt that
the prototype is still wrong, so let's just fix it anyway.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 6cd9e9f629f1 ("thermal: of: fix cooling device weights in device tree")
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/thermal.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 4014a59828fc..613c29bd6baf 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -438,7 +438,8 @@ static inline void thermal_zone_device_unregister(
 static inline int thermal_zone_bind_cooling_device(
 	struct thermal_zone_device *tz, int trip,
 	struct thermal_cooling_device *cdev,
-	unsigned long upper, unsigned long lower)
+	unsigned long upper, unsigned long lower,
+	unsigned int weight)
 { return -ENODEV; }
 static inline int thermal_zone_unbind_cooling_device(
 	struct thermal_zone_device *tz, int trip,
-- 
cgit v1.2.1


From 0f42a6a9b807b092841f7e1b381f8c7e80a0d86a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 13 Nov 2015 09:38:48 +0100
Subject: nfs: use btrfs ioctl defintions for clone

The NFS CLONE_RANGE defintion was wrong and thus never worked.  Fix this
by simply using the btrfs ioctl defintion.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/uapi/linux/nfs.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h
index 654bae3f1a38..5e6296160361 100644
--- a/include/uapi/linux/nfs.h
+++ b/include/uapi/linux/nfs.h
@@ -33,17 +33,6 @@
 
 #define NFS_PIPE_DIRNAME "nfs"
 
-/* NFS ioctls */
-/* Let's follow btrfs lead on CLONE to avoid messing userspace */
-#define NFS_IOC_CLONE		_IOW(0x94, 9, int)
-#define NFS_IOC_CLONE_RANGE	_IOW(0x94, 13, int)
-
-struct nfs_ioctl_clone_range_args {
-	__s64 src_fd;
-	__u64 src_off, count;
-	__u64 dst_off;
-};
-
 /*
  * NFS stats. The good thing with these values is that NFSv3 errors are
  * a superset of NFSv2 errors (with the exception of NFSERR_WFLUSH which
-- 
cgit v1.2.1


From 91ab4b4d16e6649fbbf65f303c0c4e20ed680bd1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Thu, 19 Nov 2015 14:30:26 -0500
Subject: nfs: use sliding delay when LAYOUTGET gets NFS4ERR_DELAY

When LAYOUTGET gets NFS4ERR_DELAY, we currently will wait 15s before
retrying the call. That is a _very_ long time, so add a timeout value to
struct nfs4_layoutget and pass nfs4_async_handle_error a pointer to it.
This allows the RPC engine to use a sliding delay window, instead of a
15s delay.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 570d630f98ae..11bbae44f4cb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -251,6 +251,7 @@ struct nfs4_layoutget {
 	struct nfs4_layoutget_res res;
 	struct rpc_cred *cred;
 	gfp_t gfp_flags;
+	long timeout;
 };
 
 struct nfs4_getdeviceinfo_args {
-- 
cgit v1.2.1


From c3ede03c881ca8ad618ad52c82b44ecb72c6e408 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Mon, 9 Nov 2015 16:43:09 +0100
Subject: gpu: ipu-v3: drop unused dmfc field from client platform data

This field is never used, drop it.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/video/imx-ipu-v3.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index 85dedca3dcfb..eeba75395f7d 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -343,7 +343,6 @@ struct ipu_client_platformdata {
 	int di;
 	int dc;
 	int dp;
-	int dmfc;
 	int dma[2];
 };
 
-- 
cgit v1.2.1


From 0e3dfda91d9fe8e2c4d0b5d21434b173a241eeaf Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 24 Nov 2015 16:23:05 +0100
Subject: KVM: arm/arm64: arch_timer: Preserve physical dist. active state on
 LR.active

We were incorrectly removing the active state from the physical
distributor on the timer interrupt when the timer output level was
deasserted.  We shouldn't be doing this without considering the virtual
interrupt's active state, because the architecture requires that when an
LR has the HW bit set and the pending or active bits set, then the
physical interrupt must also have the corresponding bits set.

This addresses an issue where we have been observing an inconsistency
between the LR state and the physical distributor state where the LR
state was active and the physical distributor was not active, which
shouldn't happen.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/kvm/arm_vgic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 9c747cb14ad8..d2f41477f8ae 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -342,10 +342,10 @@ int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
 			       struct irq_phys_map *map, bool level);
 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
 					   int virt_irq, int irq);
 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
-- 
cgit v1.2.1


From 264640fc2c5f4f913db5c73fa3eb1ead2c45e9d7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= <mkubecek@suse.cz>
Date: Tue, 24 Nov 2015 15:07:11 +0100
Subject: ipv6: distinguish frag queues by device for multicast and link-local
 packets

If a fragmented multicast packet is received on an ethernet device which
has an active macvlan on top of it, each fragment is duplicated and
received both on the underlying device and the macvlan. If some
fragments for macvlan are processed before the whole packet for the
underlying device is reassembled, the "overlapping fragments" test in
ip6_frag_queue() discards the whole fragment queue.

To resolve this, add device ifindex to the search key and require it to
match reassembling multicast packets and packets to link-local
addresses.

Note: similar patch has been already submitted by Yoshifuji Hideaki in

  http://patchwork.ozlabs.org/patch/220979/

but got lost and forgotten for some reason.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e1a10b0ac0b0..ea5a13ef85a6 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -490,6 +490,7 @@ struct ip6_create_arg {
 	u32 user;
 	const struct in6_addr *src;
 	const struct in6_addr *dst;
+	int iif;
 	u8 ecn;
 };
 
-- 
cgit v1.2.1


From fbc416ff86183e2203cdf975e2881d7c164b0271 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 20 Nov 2015 12:12:21 +0100
Subject: arm64: fix building without CONFIG_UID16

As reported by Michal Simek, building an ARM64 kernel with CONFIG_UID16
disabled currently fails because the system call table still needs to
reference the individual function entry points that are provided by
kernel/sys_ni.c in this case, and the declarations are hidden inside
of #ifdef CONFIG_UID16:

arch/arm64/include/asm/unistd32.h:57:8: error: 'sys_lchown16' undeclared here (not in a function)
 __SYSCALL(__NR_lchown, sys_lchown16)

I believe this problem only exists on ARM64, because older architectures
tend to not need declarations when their system call table is built
in assembly code, while newer architectures tend to not need UID16
support. ARM64 only uses these system calls for compatibility with
32-bit ARM binaries.

This changes the CONFIG_UID16 check into CONFIG_HAVE_UID16, which is
set unconditionally on ARM64 with CONFIG_COMPAT, so we see the
declarations whenever we need them, but otherwise the behavior is
unchanged.

Fixes: af1839eb4bd4 ("Kconfig: clean up the long arch list for the UID16 config option")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/syscalls.h | 2 +-
 include/linux/types.h    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a156b82dd14c..c2b66a277e98 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -524,7 +524,7 @@ asmlinkage long sys_chown(const char __user *filename,
 asmlinkage long sys_lchown(const char __user *filename,
 				uid_t user, gid_t group);
 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
-#ifdef CONFIG_UID16
+#ifdef CONFIG_HAVE_UID16
 asmlinkage long sys_chown16(const char __user *filename,
 				old_uid_t user, old_gid_t group);
 asmlinkage long sys_lchown16(const char __user *filename,
diff --git a/include/linux/types.h b/include/linux/types.h
index 70d8500bddf1..70dd3dfde631 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -35,7 +35,7 @@ typedef __kernel_gid16_t        gid16_t;
 
 typedef unsigned long		uintptr_t;
 
-#ifdef CONFIG_UID16
+#ifdef CONFIG_HAVE_UID16
 /* This is defined by include/asm-{arch}/posix_types.h */
 typedef __kernel_old_uid_t	old_uid_t;
 typedef __kernel_old_gid_t	old_gid_t;
-- 
cgit v1.2.1


From c9da161c6517ba12154059d3b965c2cbaf16f90f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 24 Nov 2015 21:28:15 +0100
Subject: bpf: fix clearing on persistent program array maps

Currently, when having map file descriptors pointing to program arrays,
there's still the issue that we unconditionally flush program array
contents via bpf_fd_array_map_clear() in bpf_map_release(). This happens
when such a file descriptor is released and is independent of the map's
refcount.

Having this flush independent of the refcount is for a reason: there
can be arbitrary complex dependency chains among tail calls, also circular
ones (direct or indirect, nesting limit determined during runtime), and
we need to make sure that the map drops all references to eBPF programs
it holds, so that the map's refcount can eventually drop to zero and
initiate its freeing. Btw, a walk of the whole dependency graph would
not be possible for various reasons, one being complexity and another
one inconsistency, i.e. new programs can be added to parts of the graph
at any time, so there's no guaranteed consistent state for the time of
such a walk.

Now, the program array pinning itself works, but the issue is that each
derived file descriptor on close would nevertheless call unconditionally
into bpf_fd_array_map_clear(). Instead, keep track of users and postpone
this flush until the last reference to a user is dropped. As this only
concerns a subset of references (f.e. a prog array could hold a program
that itself has reference on the prog array holding it, etc), we need to
track them separately.

Short analysis on the refcounting: on map creation time usercnt will be
one, so there's no change in behaviour for bpf_map_release(), if unpinned.
If we already fail in map_create(), we are immediately freed, and no
file descriptor has been made public yet. In bpf_obj_pin_user(), we need
to probe for a possible map in bpf_fd_probe_obj() already with a usercnt
reference, so before we drop the reference on the fd with fdput().
Therefore, if actual pinning fails, we need to drop that reference again
in bpf_any_put(), otherwise we keep holding it. When last reference
drops on the inode, the bpf_any_put() in bpf_evict_inode() will take
care of dropping the usercnt again. In the bpf_obj_get_user() case, the
bpf_any_get() will grab a reference on the usercnt, still at a time when
we have the reference on the path. Should we later on fail to grab a new
file descriptor, bpf_any_put() will drop it, otherwise we hold it until
bpf_map_release() time.

Joint work with Alexei.

Fixes: b2197755b263 ("bpf: add support for persistent maps/progs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index de464e6683b6..83d1926c61e4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,7 @@ struct bpf_map {
 	struct user_struct *user;
 	const struct bpf_map_ops *ops;
 	struct work_struct work;
+	atomic_t usercnt;
 };
 
 struct bpf_map_type_list {
@@ -167,8 +168,10 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
-struct bpf_map *bpf_map_get(u32 ufd);
+struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
+void bpf_map_inc(struct bpf_map *map, bool uref);
+void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 
 extern int sysctl_unprivileged_bpf_disabled;
-- 
cgit v1.2.1


From 7c7a0e945349a3d0d497d7f32db6ed33d4031110 Mon Sep 17 00:00:00 2001
From: Gabriele Paoloni <gabriele.paoloni@huawei.com>
Date: Wed, 11 Nov 2015 09:12:25 +0800
Subject: ARM/PCI: Move align_resource function pointer to pci_host_bridge
 structure

Commit b3a72384fe29 ("ARM/PCI: Replace pci_sys_data->align_resource with
global function pointer") introduced an ARM-specific align_resource()
function pointer.  This is not portable to other arches and doesn't work
for platforms with two different PCIe host bridge controllers.

Move the function pointer to the pci_host_bridge structure so each host
bridge driver can specify its own align_resource() function.

Signed-off-by: Gabriele Paoloni <gabriele.paoloni@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/pci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index e828e7b4afec..6ae25aae88fd 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -412,9 +412,18 @@ struct pci_host_bridge {
 	void (*release_fn)(struct pci_host_bridge *);
 	void *release_data;
 	unsigned int ignore_reset_delay:1;	/* for entire hierarchy */
+	/* Resource alignment requirements */
+	resource_size_t (*align_resource)(struct pci_dev *dev,
+			const struct resource *res,
+			resource_size_t start,
+			resource_size_t size,
+			resource_size_t align);
 };
 
 #define	to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev)
+
+struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
+
 void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
 		     void (*release_fn)(struct pci_host_bridge *),
 		     void *release_data);
-- 
cgit v1.2.1


From ca369d51b3e1649be4a72addd6d6a168cfb3f537 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 13 Nov 2015 16:46:48 -0500
Subject: block/sd: Fix device-imposed transfer length limits

Commit 4f258a46346c ("sd: Fix maximum I/O size for BLOCK_PC requests")
had the unfortunate side-effect of removing an implicit clamp to
BLK_DEF_MAX_SECTORS for REQ_TYPE_FS requests in the block layer
code. This caused problems for some SMR drives.

Debugging this issue revealed a few problems with the existing
infrastructure since the block layer didn't know how to deal with
device-imposed limits, only limits set by the I/O controller.

 - Introduce a new queue limit, max_dev_sectors, which is used by the
   ULD to signal the maximum sectors for a REQ_TYPE_FS request.

 - Ensure that max_dev_sectors is correctly stacked and taken into
   account when overriding max_sectors through sysfs.

 - Rework sd_read_block_limits() so it saves the max_xfer and opt_xfer
   values for later processing.

 - In sd_revalidate() set the queue's max_dev_sectors based on the
   MAXIMUM TRANSFER LENGTH value in the Block Limits VPD. If this value
   is not reported, fall back to a cap based on the CDB TRANSFER LENGTH
   field size.

 - In sd_revalidate(), use OPTIMAL TRANSFER LENGTH from the Block Limits
   VPD--if reported and sane--to signal the preferred device transfer
   size for FS requests. Otherwise use BLK_DEF_MAX_SECTORS.

 - blk_limits_max_hw_sectors() is no longer used and can be removed.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=93581
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: sweeneygj@gmx.com
Tested-by: Arzeets <anatol.pomozov@gmail.com>
Tested-by: David Eisner <david.eisner@oriel.oxon.org>
Tested-by: Mario Kicherer <dev@kicherer.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 38a5ff772a37..9dacb745fa96 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -253,6 +253,7 @@ struct queue_limits {
 	unsigned long		virt_boundary_mask;
 
 	unsigned int		max_hw_sectors;
+	unsigned int		max_dev_sectors;
 	unsigned int		chunk_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
@@ -948,7 +949,6 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *,
 extern void blk_cleanup_queue(struct request_queue *);
 extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
-extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
-- 
cgit v1.2.1


From 057085e522f8bf94c2e691a5b76880f68060f8ba Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Thu, 5 Nov 2015 23:37:59 -0800
Subject: target: Fix race for SCF_COMPARE_AND_WRITE_POST checking

This patch addresses a race + use after free where the first
stage of COMPARE_AND_WRITE in compare_and_write_callback()
is rescheduled after the backend sends the secondary WRITE,
resulting in second stage compare_and_write_post() callback
completing in target_complete_ok_work() before the first
can return.

Because current code depends on checking se_cmd->se_cmd_flags
after return from se_cmd->transport_complete_callback(),
this results in first stage having SCF_COMPARE_AND_WRITE_POST
set, which incorrectly falls through into second stage CAW
processing code, eventually triggering a NULL pointer
dereference due to use after free.

To address this bug, pass in a new *post_ret parameter into
se_cmd->transport_complete_callback(), and depend upon this
value instead of ->se_cmd_flags to determine when to return
or fall through into ->queue_status() code for CAW.

Cc: Sagi Grimberg <sagig@mellanox.com>
Cc: <stable@vger.kernel.org> # v3.12+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/target/target_core_base.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 0a2c74008e53..aabf0aca0171 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -474,7 +474,7 @@ struct se_cmd {
 	struct completion	cmd_wait_comp;
 	const struct target_core_fabric_ops *se_tfo;
 	sense_reason_t		(*execute_cmd)(struct se_cmd *);
-	sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool);
+	sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool, int *);
 	void			*protocol_data;
 
 	unsigned char		*t_task_cdb;
-- 
cgit v1.2.1


From 3a66d7dca186ebdef9b0bf55e216778fa598062c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Thu, 22 Oct 2015 16:02:14 -0700
Subject: kref: Remove kref_put_spinlock_irqsave()

The last user is gone. Hence remove this function.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Joern Engel <joern@logfs.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/linux/kref.h | 33 ---------------------------------
 1 file changed, 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index 484604d184be..e15828fd71f1 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -19,7 +19,6 @@
 #include <linux/atomic.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
-#include <linux/spinlock.h>
 
 struct kref {
 	atomic_t refcount;
@@ -99,38 +98,6 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)
 	return kref_sub(kref, 1, release);
 }
 
-/**
- * kref_put_spinlock_irqsave - decrement refcount for object.
- * @kref: object.
- * @release: pointer to the function that will clean up the object when the
- *	     last reference to the object is released.
- *	     This pointer is required, and it is not acceptable to pass kfree
- *	     in as this function.
- * @lock: lock to take in release case
- *
- * Behaves identical to kref_put with one exception.  If the reference count
- * drops to zero, the lock will be taken atomically wrt dropping the reference
- * count.  The release function has to call spin_unlock() without _irqrestore.
- */
-static inline int kref_put_spinlock_irqsave(struct kref *kref,
-		void (*release)(struct kref *kref),
-		spinlock_t *lock)
-{
-	unsigned long flags;
-
-	WARN_ON(release == NULL);
-	if (atomic_add_unless(&kref->refcount, -1, 1))
-		return 0;
-	spin_lock_irqsave(lock, flags);
-	if (atomic_dec_and_test(&kref->refcount)) {
-		release(kref);
-		local_irq_restore(flags);
-		return 1;
-	}
-	spin_unlock_irqrestore(lock, flags);
-	return 0;
-}
-
 static inline int kref_put_mutex(struct kref *kref,
 				 void (*release)(struct kref *kref),
 				 struct mutex *lock)
-- 
cgit v1.2.1


From 08236c6bb2980561fba657c58fdc76f2865f236c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sat, 28 Nov 2015 16:49:27 +0100
Subject: lightnvm: unconverted ppa returned in get_bb_tbl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The get_bb_tbl function takes ppa as a generic address, which is
converted to the ppa device address within the device driver. When
the update_bbtbl callback is called from get_bb_tbl, the device
specific ppa is used, instead of the generic ppa.

Make sure to pass the generic ppa.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 3db5552b17d5..c6916aec43b6 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -179,7 +179,7 @@ typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
 typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
 typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
 				nvm_l2p_update_fn *, void *);
-typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, struct ppa_addr, int,
+typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
 				nvm_bb_update_fn *, void *);
 typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
 typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
-- 
cgit v1.2.1


From bf4e6b4e757488dee1b6a581f49c7ac34cd217f8 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Thu, 26 Nov 2015 08:46:57 +0100
Subject: block: Always check queue limits for cloned requests

When a cloned request is retried on other queues it always needs
to be checked against the queue limits of that queue.
Otherwise the calculations for nr_phys_segments might be wrong,
leading to a crash in scsi_init_sgtable().

To clarify this the patch renames blk_rq_check_limits()
to blk_cloned_rq_check_limits() and removes the symbol
export, as the new function should only be used for
cloned requests and never exported.

Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Ewan Milne <emilne@redhat.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Fixes: e2a60da74 ("block: Clean up special command handling logic")
Cc: stable@vger.kernel.org # 3.7+
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c0d2b7927c1f..c06f8eaa42ff 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -773,7 +773,6 @@ extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
-extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     struct bio_set *bs, gfp_t gfp_mask,
-- 
cgit v1.2.1


From 880621c2605b82eb5af91a2c94223df6f5a3fb64 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 22 Nov 2015 17:46:09 +0100
Subject: packet: Allow packets with only a header (but no payload)

Commit 9c7077622dd91 ("packet: make packet_snd fail on len smaller
than l2 header") added validation for the packet size in packet_snd.
This change enforces that every packet needs a header (with at least
hard_header_len bytes) plus a payload with at least one byte. Before
this change the payload was optional.

This fixes PPPoE connections which do not have a "Service" or
"Host-Uniq" configured (which is violating the spec, but is still
widely used in real-world setups). Those are currently failing with the
following message: "pppd: packet size is too short (24 <= 24)"

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 67bfac1abfc1..3b5d134e945a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1398,7 +1398,8 @@ enum netdev_priv_flags {
  *	@dma:		DMA channel
  *	@mtu:		Interface MTU value
  *	@type:		Interface hardware type
- *	@hard_header_len: Hardware header length
+ *	@hard_header_len: Hardware header length, which means that this is the
+ *			  minimum size of a packet.
  *
  *	@needed_headroom: Extra headroom the hardware may need, but not in all
  *			  cases can this be guaranteed
-- 
cgit v1.2.1


From 304d888b29cf96f1dd53511ee686499cd8cdf249 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 27 Nov 2015 18:17:05 +0100
Subject: Revert "ipv6: ndisc: inherit metadata dst when creating ndisc
 requests"

This reverts commit ab450605b35caa768ca33e86db9403229bf42be4.

In IPv6, we cannot inherit the dst of the original dst. ndisc packets
are IPv6 packets and may take another route than the original packet.

This patch breaks the following scenario: a packet comes from eth0 and
is forwarded through vxlan1. The encapsulated packet triggers an NS
which cannot be sent because of the wrong route.

CC: Jiri Benc <jbenc@redhat.com>
CC: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ndisc.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index bf3937431030..2d8edaad29cb 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -181,8 +181,7 @@ void ndisc_cleanup(void);
 int ndisc_rcv(struct sk_buff *skb);
 
 void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
-		   const struct in6_addr *daddr, const struct in6_addr *saddr,
-		   struct sk_buff *oskb);
+		   const struct in6_addr *daddr, const struct in6_addr *saddr);
 
 void ndisc_send_rs(struct net_device *dev,
 		   const struct in6_addr *saddr, const struct in6_addr *daddr);
-- 
cgit v1.2.1


From 9cd3e072b0be17446e37d7414eac8a3499e0601e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Nov 2015 20:03:10 -0800
Subject: net: rename SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA

This patch is a cleanup to make following patch easier to
review.

Goal is to move SOCK_ASYNC_NOSPACE and SOCK_ASYNC_WAITDATA
from (struct socket)->flags to a (struct socket_wq)->flags
to benefit from RCU protection in sock_wake_async()

To ease backports, we rename both constants.

Two new helpers, sk_set_bit(int nr, struct sock *sk)
and sk_clear_bit(int net, struct sock *sk) are added so that
following patch can change their implementation.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h |  6 +++---
 include/net/sock.h  | 10 ++++++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index 70ac5e28e6b7..f514e4dd5521 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -34,8 +34,8 @@ struct inode;
 struct file;
 struct net;
 
-#define SOCK_ASYNC_NOSPACE	0
-#define SOCK_ASYNC_WAITDATA	1
+#define SOCKWQ_ASYNC_NOSPACE	0
+#define SOCKWQ_ASYNC_WAITDATA	1
 #define SOCK_NOSPACE		2
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
@@ -96,7 +96,7 @@ struct socket_wq {
  *  struct socket - general BSD socket
  *  @state: socket state (%SS_CONNECTED, etc)
  *  @type: socket type (%SOCK_STREAM, etc)
- *  @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
+ *  @flags: socket flags (%SOCK_NOSPACE, etc)
  *  @ops: protocol specific socket operations
  *  @file: File back pointer for gc
  *  @sk: internal networking protocol agnostic socket representation
diff --git a/include/net/sock.h b/include/net/sock.h
index 7f89e4ba18d1..c155d09d8af4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2005,6 +2005,16 @@ static inline unsigned long sock_wspace(struct sock *sk)
 	return amt;
 }
 
+static inline void sk_set_bit(int nr, struct sock *sk)
+{
+	set_bit(nr, &sk->sk_socket->flags);
+}
+
+static inline void sk_clear_bit(int nr, struct sock *sk)
+{
+	clear_bit(nr, &sk->sk_socket->flags);
+}
+
 static inline void sk_wake_async(struct sock *sk, int how, int band)
 {
 	if (sock_flag(sk, SOCK_FASYNC))
-- 
cgit v1.2.1


From ceb5d58b217098a657f3850b7a2640f995032e62 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Nov 2015 20:03:11 -0800
Subject: net: fix sock_wake_async() rcu protection

Dmitry provided a syzkaller (http://github.com/google/syzkaller)
triggering a fault in sock_wake_async() when async IO is requested.

Said program stressed af_unix sockets, but the issue is generic
and should be addressed in core networking stack.

The problem is that by the time sock_wake_async() is called,
we should not access the @flags field of 'struct socket',
as the inode containing this socket might be freed without
further notice, and without RCU grace period.

We already maintain an RCU protected structure, "struct socket_wq"
so moving SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA into it
is the safe route.

It also reduces number of cache lines needing dirtying, so might
provide a performance improvement anyway.

In followup patches, we might move remaining flags (SOCK_NOSPACE,
SOCK_PASSCRED, SOCK_PASSSEC) to save 8 bytes and let 'struct socket'
being mostly read and let it being shared between cpus.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h |  7 ++++++-
 include/net/sock.h  | 23 ++++++++++++++++-------
 2 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index f514e4dd5521..0b4ac7da583a 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -34,6 +34,10 @@ struct inode;
 struct file;
 struct net;
 
+/* Historically, SOCKWQ_ASYNC_NOSPACE & SOCKWQ_ASYNC_WAITDATA were located
+ * in sock->flags, but moved into sk->sk_wq->flags to be RCU protected.
+ * Eventually all flags will be in sk->sk_wq_flags.
+ */
 #define SOCKWQ_ASYNC_NOSPACE	0
 #define SOCKWQ_ASYNC_WAITDATA	1
 #define SOCK_NOSPACE		2
@@ -89,6 +93,7 @@ struct socket_wq {
 	/* Note: wait MUST be first field of socket_wq */
 	wait_queue_head_t	wait;
 	struct fasync_struct	*fasync_list;
+	unsigned long		flags; /* %SOCKWQ_ASYNC_NOSPACE, etc */
 	struct rcu_head		rcu;
 } ____cacheline_aligned_in_smp;
 
@@ -202,7 +207,7 @@ enum {
 	SOCK_WAKE_URG,
 };
 
-int sock_wake_async(struct socket *sk, int how, int band);
+int sock_wake_async(struct socket_wq *sk_wq, int how, int band);
 int sock_register(const struct net_proto_family *fam);
 void sock_unregister(int family);
 int __sock_create(struct net *net, int family, int type, int proto,
diff --git a/include/net/sock.h b/include/net/sock.h
index c155d09d8af4..0434138c5f95 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -384,8 +384,10 @@ struct sock {
 	int			sk_rcvbuf;
 
 	struct sk_filter __rcu	*sk_filter;
-	struct socket_wq __rcu	*sk_wq;
-
+	union {
+		struct socket_wq __rcu	*sk_wq;
+		struct socket_wq	*sk_wq_raw;
+	};
 #ifdef CONFIG_XFRM
 	struct xfrm_policy	*sk_policy[2];
 #endif
@@ -2005,20 +2007,27 @@ static inline unsigned long sock_wspace(struct sock *sk)
 	return amt;
 }
 
+/* Note:
+ *  We use sk->sk_wq_raw, from contexts knowing this
+ *  pointer is not NULL and cannot disappear/change.
+ */
 static inline void sk_set_bit(int nr, struct sock *sk)
 {
-	set_bit(nr, &sk->sk_socket->flags);
+	set_bit(nr, &sk->sk_wq_raw->flags);
 }
 
 static inline void sk_clear_bit(int nr, struct sock *sk)
 {
-	clear_bit(nr, &sk->sk_socket->flags);
+	clear_bit(nr, &sk->sk_wq_raw->flags);
 }
 
-static inline void sk_wake_async(struct sock *sk, int how, int band)
+static inline void sk_wake_async(const struct sock *sk, int how, int band)
 {
-	if (sock_flag(sk, SOCK_FASYNC))
-		sock_wake_async(sk->sk_socket, how, band);
+	if (sock_flag(sk, SOCK_FASYNC)) {
+		rcu_read_lock();
+		sock_wake_async(rcu_dereference(sk->sk_wq), how, band);
+		rcu_read_unlock();
+	}
 }
 
 /* Since sk_{r,w}mem_alloc sums skb->truesize, even a small frame might
-- 
cgit v1.2.1


From 64031e3e8a5c042840c5123af695eec89f9e6a24 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <hanjun.guo@linaro.org>
Date: Wed, 2 Dec 2015 15:44:22 +0800
Subject: ACPI / property: fix compile error for
 acpi_node_get_property_reference() when CONFIG_ACPI=n

In commit 60ba032ed76e ("ACPI / property: Drop size_prop from
acpi_dev_get_property_reference()"), the argument "const char *cells_name"
was dropped, but forgot to update the stub function in no-ACPI case,
it will lead to compile error when CONFIG_ACPI=n, easliy remove
"const char *cells_name" to fix it.

Fixes: 60ba032ed76e "ACPI / property: Drop size_prop from acpi_dev_get_property_reference()"
Reported-by: Kejian Yan <yankejian@huawei.com>
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/acpi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 865d948c60e6..9e6f4bb4692f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -782,8 +782,8 @@ static inline int acpi_dev_get_property(struct acpi_device *adev,
 }
 
 static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
-				const char *name, const char *cells_name,
-				size_t index, struct acpi_reference_args *args)
+				const char *name, size_t index,
+				struct acpi_reference_args *args)
 {
 	return -ENXIO;
 }
-- 
cgit v1.2.1


From 69030dd1c3671625c6f766af0b64a4bb4409ac3b Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 1 Dec 2015 16:52:14 -0800
Subject: cpufreq: use last policy after online for drivers with ->setpolicy

For cpufreq drivers which use setpolicy interface, after offline->online
the policy is set to default. This can be reproduced by setting the
default policy of intel_pstate or longrun to ondemand and then change to
"performance". After offline and online, the setpolicy will be called with
the policy=ondemand.

For drivers using governors this condition is handled by storing
last_governor, during offline and restoring during online. The same should
be done for drivers using setpolicy interface. Storing last_policy during
offline and restoring during online.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ef4c5b1a860f..177c7680c1a8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -77,6 +77,7 @@ struct cpufreq_policy {
 	unsigned int		suspend_freq; /* freq to set during suspend */
 
 	unsigned int		policy; /* see above */
+	unsigned int		last_policy; /* policy before unplug */
 	struct cpufreq_governor	*governor; /* see below */
 	void			*governor_data;
 	bool			governor_enabled; /* governor start/stop flag */
-- 
cgit v1.2.1


From 45f6fad84cc305103b28d73482b344d7f5b76f39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Nov 2015 19:37:57 -0800
Subject: ipv6: add complete rcu protection around np->opt

This patch addresses multiple problems :

UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions
while socket is not locked : Other threads can change np->opt
concurrently. Dmitry posted a syzkaller
(http://github.com/google/syzkaller) program desmonstrating
use-after-free.

Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock()
and dccp_v6_request_recv_sock() also need to use RCU protection
to dereference np->opt once (before calling ipv6_dup_options())

This patch adds full RCU protection to np->opt

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h |  2 +-
 include/net/ipv6.h   | 21 ++++++++++++++++++++-
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0ef2a97ccdb5..402753bccafa 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -227,7 +227,7 @@ struct ipv6_pinfo {
 	struct ipv6_ac_socklist	*ipv6_ac_list;
 	struct ipv6_fl_socklist __rcu *ipv6_fl_list;
 
-	struct ipv6_txoptions	*opt;
+	struct ipv6_txoptions __rcu	*opt;
 	struct sk_buff		*pktoptions;
 	struct sk_buff		*rxpmtu;
 	struct inet6_cork	cork;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ea5a13ef85a6..9a5c9f013784 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock;
  */
 
 struct ipv6_txoptions {
+	atomic_t		refcnt;
 	/* Length of this structure */
 	int			tot_len;
 
@@ -217,7 +218,7 @@ struct ipv6_txoptions {
 	struct ipv6_opt_hdr	*dst0opt;
 	struct ipv6_rt_hdr	*srcrt;	/* Routing Header */
 	struct ipv6_opt_hdr	*dst1opt;
-
+	struct rcu_head		rcu;
 	/* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
 };
 
@@ -252,6 +253,24 @@ struct ipv6_fl_socklist {
 	struct rcu_head			rcu;
 };
 
+static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
+{
+	struct ipv6_txoptions *opt;
+
+	rcu_read_lock();
+	opt = rcu_dereference(np->opt);
+	if (opt && !atomic_inc_not_zero(&opt->refcnt))
+		opt = NULL;
+	rcu_read_unlock();
+	return opt;
+}
+
+static inline void txopt_put(struct ipv6_txoptions *opt)
+{
+	if (opt && atomic_dec_and_test(&opt->refcnt))
+		kfree_rcu(opt, rcu);
+}
+
 struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label);
 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
 					 struct ip6_flowlabel *fl,
-- 
cgit v1.2.1


From 38ee8fb67c3457f36f5137073c4b8ac2436d2393 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 30 Nov 2015 12:17:06 -0200
Subject: sctp: convert sack_needed and sack_generation to bits

They don't need to be any bigger than that and with this we start a new
bitfield for tracking association runtime stuff, like zero window
situation.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 495c87e367b3..7bbb71081aeb 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -775,10 +775,10 @@ struct sctp_transport {
 		hb_sent:1,
 
 		/* Is the Path MTU update pending on this tranport */
-		pmtu_pending:1;
+		pmtu_pending:1,
 
-	/* Has this transport moved the ctsn since we last sacked */
-	__u32 sack_generation;
+		/* Has this transport moved the ctsn since we last sacked */
+		sack_generation:1;
 	u32 dst_cookie;
 
 	struct flowi fl;
@@ -1482,19 +1482,19 @@ struct sctp_association {
 			prsctp_capable:1,   /* Can peer do PR-SCTP? */
 			auth_capable:1;     /* Is peer doing SCTP-AUTH? */
 
-		/* Ack State   : This flag indicates if the next received
+		/* sack_needed : This flag indicates if the next received
 		 *             : packet is to be responded to with a
-		 *             : SACK. This is initializedto 0.  When a packet
-		 *             : is received it is incremented. If this value
+		 *             : SACK. This is initialized to 0.  When a packet
+		 *             : is received sack_cnt is incremented. If this value
 		 *             : reaches 2 or more, a SACK is sent and the
 		 *             : value is reset to 0. Note: This is used only
 		 *             : when no DATA chunks are received out of
 		 *             : order.  When DATA chunks are out of order,
 		 *             : SACK's are not delayed (see Section 6).
 		 */
-		__u8    sack_needed;     /* Do we need to sack the peer? */
+		__u8    sack_needed:1,     /* Do we need to sack the peer? */
+			sack_generation:1;
 		__u32	sack_cnt;
-		__u32	sack_generation;
 
 		__u32   adaptation_ind;	 /* Adaptation Code point. */
 
-- 
cgit v1.2.1


From 1f7dd3e5a6e4f093017fff12232572ee1aa4639b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Dec 2015 10:18:21 -0500
Subject: cgroup: fix handling of multi-destination migration from
 subtree_control enabling

Consider the following v2 hierarchy.

  P0 (+memory) --- P1 (-memory) --- A
                                 \- B

P0 has memory enabled in its subtree_control while P1 doesn't.  If
both A and B contain processes, they would belong to the memory css of
P1.  Now if memory is enabled on P1's subtree_control, memory csses
should be created on both A and B and A's processes should be moved to
the former and B's processes the latter.  IOW, enabling controllers
can cause atomic migrations into different csses.

The core cgroup migration logic has been updated accordingly but the
controller migration methods haven't and still assume that all tasks
migrate to a single target css; furthermore, the methods were fed the
css in which subtree_control was updated which is the parent of the
target csses.  pids controller depends on the migration methods to
move charges and this made the controller attribute charges to the
wrong csses often triggering the following warning by driving a
counter negative.

 WARNING: CPU: 1 PID: 1 at kernel/cgroup_pids.c:97 pids_cancel.constprop.6+0x31/0x40()
 Modules linked in:
 CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc1+ #29
 ...
  ffffffff81f65382 ffff88007c043b90 ffffffff81551ffc 0000000000000000
  ffff88007c043bc8 ffffffff810de202 ffff88007a752000 ffff88007a29ab00
  ffff88007c043c80 ffff88007a1d8400 0000000000000001 ffff88007c043bd8
 Call Trace:
  [<ffffffff81551ffc>] dump_stack+0x4e/0x82
  [<ffffffff810de202>] warn_slowpath_common+0x82/0xc0
  [<ffffffff810de2fa>] warn_slowpath_null+0x1a/0x20
  [<ffffffff8118e031>] pids_cancel.constprop.6+0x31/0x40
  [<ffffffff8118e0fd>] pids_can_attach+0x6d/0xf0
  [<ffffffff81188a4c>] cgroup_taskset_migrate+0x6c/0x330
  [<ffffffff81188e05>] cgroup_migrate+0xf5/0x190
  [<ffffffff81189016>] cgroup_attach_task+0x176/0x200
  [<ffffffff8118949d>] __cgroup_procs_write+0x2ad/0x460
  [<ffffffff81189684>] cgroup_procs_write+0x14/0x20
  [<ffffffff811854e5>] cgroup_file_write+0x35/0x1c0
  [<ffffffff812e26f1>] kernfs_fop_write+0x141/0x190
  [<ffffffff81265f88>] __vfs_write+0x28/0xe0
  [<ffffffff812666fc>] vfs_write+0xac/0x1a0
  [<ffffffff81267019>] SyS_write+0x49/0xb0
  [<ffffffff81bcef32>] entry_SYSCALL_64_fastpath+0x12/0x76

This patch fixes the bug by removing @css parameter from the three
migration methods, ->can_attach, ->cancel_attach() and ->attach() and
updating cgroup_taskset iteration helpers also return the destination
css in addition to the task being migrated.  All controllers are
updated accordingly.

* Controllers which don't care whether there are one or multiple
  target csses can be converted trivially.  cpu, io, freezer, perf,
  netclassid and netprio fall in this category.

* cpuset's current implementation assumes that there's single source
  and destination and thus doesn't support v2 hierarchy already.  The
  only change made by this patchset is how that single destination css
  is obtained.

* memory migration path already doesn't do anything on v2.  How the
  single destination css is obtained is updated and the prep stage of
  mem_cgroup_can_attach() is reordered to accomodate the change.

* pids is the only controller which was affected by this bug.  It now
  correctly handles multi-destination migrations and no longer causes
  counter underflow from incorrect accounting.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Aleksa Sarai <cyphar@cyphar.com>
---
 include/linux/cgroup-defs.h |  9 +++------
 include/linux/cgroup.h      | 33 ++++++++++++++++++++++-----------
 2 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 869fd4a3d28e..06b77f9dd3f2 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -422,12 +422,9 @@ struct cgroup_subsys {
 	void (*css_reset)(struct cgroup_subsys_state *css);
 	void (*css_e_css_changed)(struct cgroup_subsys_state *css);
 
-	int (*can_attach)(struct cgroup_subsys_state *css,
-			  struct cgroup_taskset *tset);
-	void (*cancel_attach)(struct cgroup_subsys_state *css,
-			      struct cgroup_taskset *tset);
-	void (*attach)(struct cgroup_subsys_state *css,
-		       struct cgroup_taskset *tset);
+	int (*can_attach)(struct cgroup_taskset *tset);
+	void (*cancel_attach)(struct cgroup_taskset *tset);
+	void (*attach)(struct cgroup_taskset *tset);
 	int (*can_fork)(struct task_struct *task, void **priv_p);
 	void (*cancel_fork)(struct task_struct *task, void *priv);
 	void (*fork)(struct task_struct *task, void *priv);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f64083030ad5..cb91b44f5f78 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -120,8 +120,10 @@ struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state
 struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
 						     struct cgroup_subsys_state *css);
 
-struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
-struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
+struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
+					 struct cgroup_subsys_state **dst_cssp);
+struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
+					struct cgroup_subsys_state **dst_cssp);
 
 void css_task_iter_start(struct cgroup_subsys_state *css,
 			 struct css_task_iter *it);
@@ -236,30 +238,39 @@ void css_task_iter_end(struct css_task_iter *it);
 /**
  * cgroup_taskset_for_each - iterate cgroup_taskset
  * @task: the loop cursor
+ * @dst_css: the destination css
  * @tset: taskset to iterate
  *
  * @tset may contain multiple tasks and they may belong to multiple
- * processes.  When there are multiple tasks in @tset, if a task of a
- * process is in @tset, all tasks of the process are in @tset.  Also, all
- * are guaranteed to share the same source and destination csses.
+ * processes.
+ *
+ * On the v2 hierarchy, there may be tasks from multiple processes and they
+ * may not share the source or destination csses.
+ *
+ * On traditional hierarchies, when there are multiple tasks in @tset, if a
+ * task of a process is in @tset, all tasks of the process are in @tset.
+ * Also, all are guaranteed to share the same source and destination csses.
  *
  * Iteration is not in any specific order.
  */
-#define cgroup_taskset_for_each(task, tset)				\
-	for ((task) = cgroup_taskset_first((tset)); (task);		\
-	     (task) = cgroup_taskset_next((tset)))
+#define cgroup_taskset_for_each(task, dst_css, tset)			\
+	for ((task) = cgroup_taskset_first((tset), &(dst_css));		\
+	     (task);							\
+	     (task) = cgroup_taskset_next((tset), &(dst_css)))
 
 /**
  * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
  * @leader: the loop cursor
+ * @dst_css: the destination css
  * @tset: takset to iterate
  *
  * Iterate threadgroup leaders of @tset.  For single-task migrations, @tset
  * may not contain any.
  */
-#define cgroup_taskset_for_each_leader(leader, tset)			\
-	for ((leader) = cgroup_taskset_first((tset)); (leader);		\
-	     (leader) = cgroup_taskset_next((tset)))			\
+#define cgroup_taskset_for_each_leader(leader, dst_css, tset)		\
+	for ((leader) = cgroup_taskset_first((tset), &(dst_css));	\
+	     (leader);							\
+	     (leader) = cgroup_taskset_next((tset), &(dst_css)))	\
 		if ((leader) != (leader)->group_leader)			\
 			;						\
 		else
-- 
cgit v1.2.1


From 6bd4f355df2eae80b8a5c7b097371cd1e05f20d5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 Dec 2015 21:53:57 -0800
Subject: ipv6: kill sk_dst_lock

While testing the np->opt RCU conversion, I found that UDP/IPv6 was
using a mixture of xchg() and sk_dst_lock to protect concurrent changes
to sk->sk_dst_cache, leading to possible corruptions and crashes.

ip6_sk_dst_lookup_flow() uses sk_dst_check() anyway, so the simplest
way to fix the mess is to remove sk_dst_lock completely, as we did for
IPv4.

__ip6_dst_store() and ip6_dst_store() share same implementation.

sk_setup_caps() being called with socket lock being held or not,
we have to use sk_dst_set() instead of __sk_dst_set()

Note that I had to move the "np->dst_cookie = rt6_get_cookie(rt);"
in ip6_dst_store() before the sk_setup_caps(sk, dst) call.

This is because ip6_dst_store() can be called from process context,
without any lock held.

As soon as the dst is installed in sk->sk_dst_cache, dst can be freed
from another cpu doing a concurrent ip6_dst_store()

Doing the dst dereference before doing the install is needed to make
sure no use after free would trigger.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 17 ++++-------------
 include/net/sock.h      |  3 +--
 2 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 2bfb2ad2fab1..877f682989b8 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -133,27 +133,18 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway);
 /*
  *	Store a destination cache entry in a socket
  */
-static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst,
-				   const struct in6_addr *daddr,
-				   const struct in6_addr *saddr)
+static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
+				 const struct in6_addr *daddr,
+				 const struct in6_addr *saddr)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct rt6_info *rt = (struct rt6_info *) dst;
 
+	np->dst_cookie = rt6_get_cookie((struct rt6_info *)dst);
 	sk_setup_caps(sk, dst);
 	np->daddr_cache = daddr;
 #ifdef CONFIG_IPV6_SUBTREES
 	np->saddr_cache = saddr;
 #endif
-	np->dst_cookie = rt6_get_cookie(rt);
-}
-
-static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
-				 struct in6_addr *daddr, struct in6_addr *saddr)
-{
-	spin_lock(&sk->sk_dst_lock);
-	__ip6_dst_store(sk, dst, daddr, saddr);
-	spin_unlock(&sk->sk_dst_lock);
 }
 
 static inline bool ipv6_unicast_destination(const struct sk_buff *skb)
diff --git a/include/net/sock.h b/include/net/sock.h
index 0434138c5f95..52d27ee924f4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -254,7 +254,6 @@ struct cg_proto;
   *	@sk_wq: sock wait queue and async head
   *	@sk_rx_dst: receive input route used by early demux
   *	@sk_dst_cache: destination cache
-  *	@sk_dst_lock: destination cache lock
   *	@sk_policy: flow policy
   *	@sk_receive_queue: incoming packets
   *	@sk_wmem_alloc: transmit queue bytes committed
@@ -393,7 +392,7 @@ struct sock {
 #endif
 	struct dst_entry	*sk_rx_dst;
 	struct dst_entry __rcu	*sk_dst_cache;
-	spinlock_t		sk_dst_lock;
+	/* Note: 32bit hole on 64bit arches */
 	atomic_t		sk_wmem_alloc;
 	atomic_t		sk_omem_alloc;
 	int			sk_sndbuf;
-- 
cgit v1.2.1


From 4eaf3b84f2881c9c028f1d5e76c52ab575fe3a66 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 1 Dec 2015 20:08:51 -0800
Subject: net_sched: fix qdisc_tree_decrease_qlen() races

qdisc_tree_decrease_qlen() suffers from two problems on multiqueue
devices.

One problem is that it updates sch->q.qlen and sch->qstats.drops
on the mq/mqprio root qdisc, while it should not : Daniele
reported underflows errors :
[  681.774821] PAX: sch->q.qlen: 0 n: 1
[  681.774825] PAX: size overflow detected in function qdisc_tree_decrease_qlen net/sched/sch_api.c:769 cicus.693_49 min, count: 72, decl: qlen; num: 0; context: sk_buff_head;
[  681.774954] CPU: 2 PID: 19 Comm: ksoftirqd/2 Tainted: G           O    4.2.6.201511282239-1-grsec #1
[  681.774955] Hardware name: ASUSTeK COMPUTER INC. X302LJ/X302LJ, BIOS X302LJ.202 03/05/2015
[  681.774956]  ffffffffa9a04863 0000000000000000 0000000000000000 ffffffffa990ff7c
[  681.774959]  ffffc90000d3bc38 ffffffffa95d2810 0000000000000007 ffffffffa991002b
[  681.774960]  ffffc90000d3bc68 ffffffffa91a44f4 0000000000000001 0000000000000001
[  681.774962] Call Trace:
[  681.774967]  [<ffffffffa95d2810>] dump_stack+0x4c/0x7f
[  681.774970]  [<ffffffffa91a44f4>] report_size_overflow+0x34/0x50
[  681.774972]  [<ffffffffa94d17e2>] qdisc_tree_decrease_qlen+0x152/0x160
[  681.774976]  [<ffffffffc02694b1>] fq_codel_dequeue+0x7b1/0x820 [sch_fq_codel]
[  681.774978]  [<ffffffffc02680a0>] ? qdisc_peek_dequeued+0xa0/0xa0 [sch_fq_codel]
[  681.774980]  [<ffffffffa94cd92d>] __qdisc_run+0x4d/0x1d0
[  681.774983]  [<ffffffffa949b2b2>] net_tx_action+0xc2/0x160
[  681.774985]  [<ffffffffa90664c1>] __do_softirq+0xf1/0x200
[  681.774987]  [<ffffffffa90665ee>] run_ksoftirqd+0x1e/0x30
[  681.774989]  [<ffffffffa90896b0>] smpboot_thread_fn+0x150/0x260
[  681.774991]  [<ffffffffa9089560>] ? sort_range+0x40/0x40
[  681.774992]  [<ffffffffa9085fe4>] kthread+0xe4/0x100
[  681.774994]  [<ffffffffa9085f00>] ? kthread_worker_fn+0x170/0x170
[  681.774995]  [<ffffffffa95d8d1e>] ret_from_fork+0x3e/0x70

mq/mqprio have their own ways to report qlen/drops by folding stats on
all their queues, with appropriate locking.

A second problem is that qdisc_tree_decrease_qlen() calls qdisc_lookup()
without proper locking : concurrent qdisc updates could corrupt the list
that qdisc_match_from_root() parses to find a qdisc given its handle.

Fix first problem adding a TCQ_F_NOPARENT qdisc flag that
qdisc_tree_decrease_qlen() can use to abort its tree traversal,
as soon as it meets a mq/mqprio qdisc children.

Second problem can be fixed by RCU protection.
Qdisc are already freed after RCU grace period, so qdisc_list_add() and
qdisc_list_del() simply have to use appropriate rcu list variants.

A future patch will add a per struct netdev_queue list anchor, so that
qdisc_tree_decrease_qlen() can have more efficient lookups.

Reported-by: Daniele Fucini <dfucini@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Cong Wang <cwang@twopensource.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4c79ce8c1f92..b2a8e6338576 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -61,6 +61,9 @@ struct Qdisc {
 				      */
 #define TCQ_F_WARN_NONWC	(1 << 16)
 #define TCQ_F_CPUSTATS		0x20 /* run using percpu statistics */
+#define TCQ_F_NOPARENT		0x40 /* root of its hierarchy :
+				      * qdisc_tree_decrease_qlen() should stop.
+				      */
 	u32			limit;
 	const struct Qdisc_ops	*ops;
 	struct qdisc_size_table	__rcu *stab;
-- 
cgit v1.2.1


From a0af2e538c80f3e47f1d6ddf120a153ad909e8ad Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 2 Dec 2015 09:24:46 -0800
Subject: drm: Fix an unwanted master inheritance v2

A client calling drmSetMaster() using a file descriptor that was opened
when another client was master would inherit the latter client's master
object and all its authenticated clients.

This is unwanted behaviour, and when this happens, instead allocate a
brand new master object for the client calling drmSetMaster().

Fixes a BUG() throw in vmw_master_set().

Cc: <stable@vger.kernel.org>
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drmP.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 0b921ae06cd8..441b26e846d8 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -309,6 +309,11 @@ struct drm_file {
 	unsigned universal_planes:1;
 	/* true if client understands atomic properties */
 	unsigned atomic:1;
+	/*
+	 * This client is allowed to gain master privileges for @master.
+	 * Protected by struct drm_device::master_mutex.
+	 */
+	unsigned allowed_master:1;
 
 	struct pid *pid;
 	kuid_t uid;
@@ -910,6 +915,7 @@ extern int drm_open(struct inode *inode, struct file *filp);
 extern ssize_t drm_read(struct file *filp, char __user *buffer,
 			size_t count, loff_t *offset);
 extern int drm_release(struct inode *inode, struct file *filp);
+extern int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv);
 
 				/* Mapping support (drm_vm.h) */
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
-- 
cgit v1.2.1


From bbc8764f80eb872d2b36302882ddfc9882de4b16 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Tue, 10 Nov 2015 17:37:31 +0100
Subject: drm/nouveau: Fix pre-nv50 pageflip events (v4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Apparently pre-nv50 pageflip events happen before the actual vblank
period. Therefore that functionality got semi-disabled in

commit af4870e406126b7ac0ae7c7ce5751f25ebe60f28
Author: Mario Kleiner <mario.kleiner.de@gmail.com>
Date:   Tue May 13 00:42:08 2014 +0200

    drm/nouveau/kms/nv04-nv40: fix pageflip events via special case.

Unfortunately that hack got uprooted in

commit cc1ef118fc099295ae6aabbacc8af94d8d8885eb
Author: Thierry Reding <treding@nvidia.com>
Date:   Wed Aug 12 17:00:31 2015 +0200

    drm/irq: Make pipe unsigned and name consistent

Triggering a warning when trying to sample the vblank timestamp for a
non-existing pipe. There's a few ways to fix this:

- Open-code the old behaviour, which just enshrines this slight
  breakage of the userspace ABI.

- Revert Mario's commit and again inflict broken timestamps, again not
  pretty.

- Fix this for real by delaying the pageflip TS until the next vblank
  interrupt, thereby making it accurate.

This patch implements the third option. Since having a page flip
interrupt that happens when the pageflip gets armed and not when it
completes in the next vblank seems to be fairly common (older i915 hw
works very similarly) create a new helper to arm vblank events for
such drivers.

v2 (Mario Kleiner):
- Fix function prototypes in drmP.h
- Add missing vblank_put() for pageflip completion without
  pageflip event.
- Initialize sequence number for queued pageflip event to avoid
  trouble in drm_handle_vblank_events().
- Remove dead code and spelling fix.

v3 (Mario Kleiner):
- Add a signed-off-by and cc stable tag per Ilja's advice.

v4 (Thierry Reding):
- Fix kerneldoc typo, discovered by Michel Dänzer
- Rearrange tags and changelog

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=106431
Cc: Thierry Reding <treding@nvidia.com>
Cc: Mario Kleiner <mario.kleiner.de@gmail.com>
Acked-by: Ben Skeggs <bskeggs@redhat.com>
Cc: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Reviewed-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Cc: stable@vger.kernel.org # v4.3
Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drmP.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 441b26e846d8..0a271ca1f7c7 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -953,6 +953,10 @@ extern void drm_send_vblank_event(struct drm_device *dev, unsigned int pipe,
 				  struct drm_pending_vblank_event *e);
 extern void drm_crtc_send_vblank_event(struct drm_crtc *crtc,
 				       struct drm_pending_vblank_event *e);
+extern void drm_arm_vblank_event(struct drm_device *dev, unsigned int pipe,
+				 struct drm_pending_vblank_event *e);
+extern void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,
+				      struct drm_pending_vblank_event *e);
 extern bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe);
 extern bool drm_crtc_handle_vblank(struct drm_crtc *crtc);
 extern int drm_vblank_get(struct drm_device *dev, unsigned int pipe);
-- 
cgit v1.2.1


From ae5515d66362b9d96cdcfce504567f0b8b7bd83e Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 4 Dec 2015 08:38:42 -0700
Subject: Revert: "vfio: Include No-IOMMU mode"

Revert commit 033291eccbdb ("vfio: Include No-IOMMU mode") due to lack
of a user.  This was originally intended to fill a need for the DPDK
driver, but uptake has been slow so rather than support an unproven
kernel interface revert it and revisit when userspace catches up.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h      | 3 ---
 include/uapi/linux/vfio.h | 7 -------
 2 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 610a86a892b8..ddb440975382 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -44,9 +44,6 @@ struct vfio_device_ops {
 	void	(*request)(void *device_data, unsigned int count);
 };
 
-extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
-extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
-
 extern int vfio_add_group_dev(struct device *dev,
 			      const struct vfio_device_ops *ops,
 			      void *device_data);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 751b69f858c8..9fd7b5d8df2f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -38,13 +38,6 @@
 
 #define VFIO_SPAPR_TCE_v2_IOMMU		7
 
-/*
- * The No-IOMMU IOMMU offers no translation or isolation for devices and
- * supports no ioctls outside of VFIO_CHECK_EXTENSION.  Use of VFIO's No-IOMMU
- * code will taint the host kernel and should be used with extreme caution.
- */
-#define VFIO_NOIOMMU_IOMMU		8
-
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
-- 
cgit v1.2.1


From 3cf92222a39cc7842c373dd90a0c204fa7d7cced Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 3 Dec 2015 20:41:29 +0800
Subject: rhashtable: Prevent spurious EBUSY errors on insertion

Thomas and Phil observed that under stress rhashtable insertion
sometimes failed with EBUSY, even though this error should only
ever been seen when we're under attack and our hash chain length
has grown to an unacceptable level, even after a rehash.

It turns out that the logic for detecting whether there is an
existing rehash is faulty.  In particular, when two threads both
try to grow the same table at the same time, one of them may see
the newly grown table and thus erroneously conclude that it had
been rehashed.  This is what leads to the EBUSY error.

This patch fixes this by remembering the current last table we
used during insertion so that rhashtable_insert_rehash can detect
when another thread has also done a resize/rehash.  When this is
detected we will give up our resize/rehash and simply retry the
insertion with the new table.

Reported-by: Thomas Graf <tgraf@suug.ch>
Reported-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 843ceca9a21e..e50b31d18462 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -19,6 +19,7 @@
 
 #include <linux/atomic.h>
 #include <linux/compiler.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/jhash.h>
 #include <linux/list_nulls.h>
@@ -339,10 +340,11 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
 int rhashtable_init(struct rhashtable *ht,
 		    const struct rhashtable_params *params);
 
-int rhashtable_insert_slow(struct rhashtable *ht, const void *key,
-			   struct rhash_head *obj,
-			   struct bucket_table *old_tbl);
-int rhashtable_insert_rehash(struct rhashtable *ht);
+struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
+					    const void *key,
+					    struct rhash_head *obj,
+					    struct bucket_table *old_tbl);
+int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
 
 int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
 void rhashtable_walk_exit(struct rhashtable_iter *iter);
@@ -598,9 +600,11 @@ restart:
 
 	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 	if (unlikely(new_tbl)) {
-		err = rhashtable_insert_slow(ht, key, obj, new_tbl);
-		if (err == -EAGAIN)
+		tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
+		if (!IS_ERR_OR_NULL(tbl))
 			goto slow_path;
+
+		err = PTR_ERR(tbl);
 		goto out;
 	}
 
@@ -611,7 +615,7 @@ restart:
 	if (unlikely(rht_grow_above_100(ht, tbl))) {
 slow_path:
 		spin_unlock_bh(lock);
-		err = rhashtable_insert_rehash(ht);
+		err = rhashtable_insert_rehash(ht, tbl);
 		rcu_read_unlock();
 		if (err)
 			return err;
-- 
cgit v1.2.1


From c5fb8caaf91ea6a92920cf24db10cfc94d58de0f Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Fri, 4 Dec 2015 13:54:03 +0100
Subject: vxlan: fix incorrect RCO bit in VXLAN header

Commit 3511494ce2f3d ("vxlan: Group Policy extension") changed definition of
VXLAN_HF_RCO from 0x00200000 to BIT(24). This is obviously incorrect. It's
also in violation with the RFC draft.

Fixes: 3511494ce2f3d ("vxlan: Group Policy extension")
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Tom Herbert <therbert@google.com>
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/vxlan.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index c1c899c3a51b..e289ada6adf6 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -79,7 +79,7 @@ struct vxlanhdr {
 };
 
 /* VXLAN header flags. */
-#define VXLAN_HF_RCO BIT(24)
+#define VXLAN_HF_RCO BIT(21)
 #define VXLAN_HF_VNI BIT(27)
 #define VXLAN_HF_GBP BIT(31)
 
-- 
cgit v1.2.1


From 01ce63c90170283a9855d1db4fe81934dddce648 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Fri, 4 Dec 2015 15:14:04 -0200
Subject: sctp: update the netstamp_needed counter when copying sockets

Dmitry Vyukov reported that SCTP was triggering a WARN on socket destroy
related to disabling sock timestamp.

When SCTP accepts an association or peel one off, it copies sock flags
but forgot to call net_enable_timestamp() if a packet timestamping flag
was copied, leading to extra calls to net_disable_timestamp() whenever
such clones were closed.

The fix is to call net_enable_timestamp() whenever we copy a sock with
that flag on, like tcp does.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 52d27ee924f4..b1d475b5db68 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -740,6 +740,8 @@ enum sock_flags {
 	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
 };
 
+#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
+
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
 {
 	nsk->sk_flags = osk->sk_flags;
-- 
cgit v1.2.1


From 8a0d19c5ed417c78d03f4e0fa7215e58c40896d8 Mon Sep 17 00:00:00 2001
From: lucien <lucien.xin@gmail.com>
Date: Sat, 5 Dec 2015 15:35:36 +0800
Subject: sctp: start t5 timer only when peer rwnd is 0 and local state is
 SHUTDOWN_PENDING

when A sends a data to B, then A close() and enter into SHUTDOWN_PENDING
state, if B neither claim his rwnd is 0 nor send SACK for this data, A
will keep retransmitting this data until t5 timeout, Max.Retrans times
can't work anymore, which is bad.

if B's rwnd is not 0, it should send abort after Max.Retrans times, only
when B's rwnd == 0 and A's retransmitting beyonds Max.Retrans times, A
will start t5 timer, which is also commit f8d960524328 ("sctp: Enforce
retransmission limit during shutdown") means, but it lacks the condition
peer rwnd == 0.

so fix it by adding a bit (zero_window_announced) in peer to record if
the last rwnd is 0. If it was, zero_window_announced will be set. and use
this bit to decide if start t5 timer when local.state is SHUTDOWN_PENDING.

Fixes: commit f8d960524328 ("sctp: Enforce retransmission limit during shutdown")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 7bbb71081aeb..eea9bdeecba2 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1493,7 +1493,8 @@ struct sctp_association {
 		 *             : SACK's are not delayed (see Section 6).
 		 */
 		__u8    sack_needed:1,     /* Do we need to sack the peer? */
-			sack_generation:1;
+			sack_generation:1,
+			zero_window_announced:1;
 		__u32	sack_cnt;
 
 		__u32   adaptation_ind;	 /* Adaptation Code point. */
-- 
cgit v1.2.1


From 326fcfa5acca446b3f71e99f6d19881145556e5c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 5 Dec 2015 13:58:11 +0100
Subject: net: remove unnecessary semicolon in netdev_alloc_pcpu_stats()

This semicolon causes a build error if the function call is wrapped in
parentheses.

Fixes: aabc92bbe3cf ("net: add __netdev_alloc_pcpu_stats() to indicate gfp flags")
Reported-by: Imre Kaloz <kaloz@openwrt.org>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3b5d134e945a..3143c847bddb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2084,7 +2084,7 @@ struct pcpu_sw_netstats {
 })
 
 #define netdev_alloc_pcpu_stats(type)					\
-	__netdev_alloc_pcpu_stats(type, GFP_KERNEL);
+	__netdev_alloc_pcpu_stats(type, GFP_KERNEL)
 
 #include <linux/notifier.h>
 
-- 
cgit v1.2.1


From 7c23b7c1996597dd9d60bb282fb5fa1be6ebd18b Mon Sep 17 00:00:00 2001
From: "Lu, Han" <han.lu@intel.com>
Date: Mon, 7 Dec 2015 15:59:13 +0800
Subject: ALSA: hda - Fix playback noise with 24/32 bit sample size on BXT

In BXT-P A0, HD-Audio DMA requests is later than expected,
and makes an audio stream sensitive to system latencies when
24/32 bits are playing.
Adjusting threshold of DMA fifo to force the DMA request
sooner to improve latency tolerance at the expense of power.

v2: move Intel specific code to hda_intel.c

Signed-off-by: Lu, Han <han.lu@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hda_register.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/sound/hda_register.h b/include/sound/hda_register.h
index 2ae8812d7b1a..94dc6a9772e0 100644
--- a/include/sound/hda_register.h
+++ b/include/sound/hda_register.h
@@ -93,6 +93,9 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
 #define AZX_REG_HSW_EM4			0x100c
 #define AZX_REG_HSW_EM5			0x1010
 
+/* Skylake/Broxton display HD-A controller Extended Mode registers */
+#define AZX_REG_SKL_EM4L		0x1040
+
 /* PCI space */
 #define AZX_PCIREG_TCSEL		0x44
 
-- 
cgit v1.2.1


From ea013a9b205b47b1fcbc72522146fad560af0712 Mon Sep 17 00:00:00 2001
From: Andreas Werner <andreas.werner@men.de>
Date: Fri, 4 Dec 2015 18:12:49 +0100
Subject: libata-eh.c: Introduce new ata port flag for controller which lockup
 on read log page

Some controller lockup on a ata_read_log_page.
Add new ata port flag ATA_FLAG_NO_LOG_PAGE which can used
to blacklist a controller.

If this flag is set, any attempt to read a log page returns an error
without actually issuing the command.

Signed-off-by: Andreas Werner <andreas.werner@men.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/libata.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 83577f8fd15b..600c1e0626a5 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -210,6 +210,7 @@ enum {
 	ATA_FLAG_SLAVE_POSS	= (1 << 0), /* host supports slave dev */
 					    /* (doesn't imply presence) */
 	ATA_FLAG_SATA		= (1 << 1),
+	ATA_FLAG_NO_LOG_PAGE	= (1 << 5), /* do not issue log page read */
 	ATA_FLAG_NO_ATAPI	= (1 << 6), /* No ATAPI support */
 	ATA_FLAG_PIO_DMA	= (1 << 7), /* PIO cmds via DMA */
 	ATA_FLAG_PIO_LBA48	= (1 << 8), /* Host DMA engine is LBA28 only */
-- 
cgit v1.2.1


From 57b4bd06ff0372fe1e3617889c4b37fbd500364a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sun, 6 Dec 2015 11:25:47 +0100
Subject: lightnvm: comments on constants
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is not obvious what NVM_IO_* and NVM_BLK_T_* are used for. Make sure
to comment them appropriately as the other constants.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index c6916aec43b6..935ef3844c05 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -50,9 +50,16 @@ enum {
 	NVM_IO_DUAL_ACCESS	= 0x1,
 	NVM_IO_QUAD_ACCESS	= 0x2,
 
+	/* NAND Access Modes */
 	NVM_IO_SUSPEND		= 0x80,
 	NVM_IO_SLC_MODE		= 0x100,
 	NVM_IO_SCRAMBLE_DISABLE	= 0x200,
+
+	/* Block Types */
+	NVM_BLK_T_FREE		= 0x0,
+	NVM_BLK_T_BAD		= 0x1,
+	NVM_BLK_T_DEV		= 0x2,
+	NVM_BLK_T_HOST		= 0x4,
 };
 
 struct nvm_id_group {
-- 
cgit v1.2.1


From 16f26c3aa9b9c36a9d1092ae3258461d1008481e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Sun, 6 Dec 2015 11:25:48 +0100
Subject: lightnvm: replace req queue with nvmdev for lld
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the case where a request queue is passed to the low lever lightnvm
device drive integration, the device driver might pass its admin
commands through another queue. Instead pass nvm_dev, and let the
low level drive the appropriate queue.

Reported-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 935ef3844c05..034117b3be5f 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -183,17 +183,17 @@ struct nvm_block;
 
 typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
 typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
-typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *);
-typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32,
+typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
+typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
 				nvm_l2p_update_fn *, void *);
 typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
 				nvm_bb_update_fn *, void *);
-typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int);
-typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *);
-typedef int (nvm_erase_blk_fn)(struct request_queue *, struct nvm_rq *);
-typedef void *(nvm_create_dma_pool_fn)(struct request_queue *, char *);
+typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int);
+typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
 typedef void (nvm_destroy_dma_pool_fn)(void *);
-typedef void *(nvm_dev_dma_alloc_fn)(struct request_queue *, void *, gfp_t,
+typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
 								dma_addr_t *);
 typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
 
-- 
cgit v1.2.1


From 4639d60d2bfb7f5007b5d93788fd93c19b63f000 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <Tomer.Tayar@qlogic.com>
Date: Mon, 7 Dec 2015 06:25:56 -0500
Subject: qed: Fix corner case for chain in-between pages

The amount of chain next pointer elements between the producer
and the consumer indices depends on which pages they currently
point to. The current calculation is based only on their difference,
and it can lead to a number of free elements which is higher by 1
than the actual value.

Signed-off-by: Tomer Tayar <Tomer.Tayar@qlogic.com>
Signed-off-by: Manish Chopra <manish.chopra@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_chain.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index b920c3605c46..41b9049b57e2 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -111,7 +111,8 @@ static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain)
 	used = ((u32)0x10000u + (u32)(p_chain->prod_idx)) -
 		(u32)p_chain->cons_idx;
 	if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR)
-		used -= (used / p_chain->elem_per_page);
+		used -= p_chain->prod_idx / p_chain->elem_per_page -
+			p_chain->cons_idx / p_chain->elem_per_page;
 
 	return p_chain->capacity - used;
 }
-- 
cgit v1.2.1


From 76a9a3642a0b72d5687d680150580d55b6ea9804 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <Tomer.Tayar@qlogic.com>
Date: Mon, 7 Dec 2015 06:25:57 -0500
Subject: qed: fix handling of concurrent ramrods.

Concurrent non-blocking slowpath ramrods can be completed
out-of-order on the completion chain. Recycling completed elements,
while previously sent elements are still completion pending,
can lead to overriding of active elements on the chain. Furthermore,
sending pending slowpath ramrods currently lacks the update of the
chain element physical pointer.

This patch:
* Ensures that ramrods are sent to the FW with
  consecutive echo values.
* Handles out-of-order completions by freeing only first
  successive completed entries.
* Updates the chain element physical pointer when copying
  a pending element into a free element for sending.

Signed-off-by: Tomer Tayar <Tomer.Tayar@qlogic.com>
Signed-off-by: Manish Chopra <manish.chopra@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/common_hsi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 6a4347639c03..1d1ba2c5ee7a 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -9,6 +9,8 @@
 #ifndef __COMMON_HSI__
 #define __COMMON_HSI__
 
+#define CORE_SPQE_PAGE_SIZE_BYTES                       4096
+
 #define FW_MAJOR_VERSION	8
 #define FW_MINOR_VERSION	4
 #define FW_REVISION_VERSION	2
-- 
cgit v1.2.1


From d144da8c6f51f48ec39d891ea9dff80169c45f3b Mon Sep 17 00:00:00 2001
From: Mike Marciniszyn <mike.marciniszyn@intel.com>
Date: Mon, 2 Nov 2015 12:13:25 -0500
Subject: IB/core: use RCU for uverbs id lookup

The current implementation gets a spin_lock, and at any scale with
qib and hfi1 post send, the lock contention grows exponentially
with the number of QPs.

idr_find() is RCU compatibile, so read doesn't need the lock.

Change to use rcu_read_lock() and rcu_read_unlock() in
__idr_get_uobj().

kfree_rcu() is used to insure a grace period between the
idr removal and actual free.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-By: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/rdma/ib_verbs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9a68a19532ba..120da1d7f57e 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1271,6 +1271,7 @@ struct ib_uobject {
 	int			id;		/* index into kernel idr */
 	struct kref		ref;
 	struct rw_semaphore	mutex;		/* protects .live */
+	struct rcu_head		rcu;		/* kfree_rcu() overhead */
 	int			live;
 };
 
-- 
cgit v1.2.1


From bd5eb35f16a9c55afcf5eb1c920cbbaf09747369 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 7 Dec 2015 08:53:17 -0800
Subject: xfrm: take care of request sockets

TCP SYNACK messages might now be attached to request sockets.

XFRM needs to get back to a listener socket.

Adds new helpers that might be used elsewhere :
sk_to_full_sk() and sk_const_to_full_sk()

Note: We also need to add RCU protection for xfrm lookups,
now TCP/DCCP have lockless listener processing. This will
be addressed in separate patches.

Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener")
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 2134e6d815bc..625bdf95d673 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -210,18 +210,37 @@ struct inet_sock {
 #define IP_CMSG_ORIGDSTADDR	BIT(6)
 #define IP_CMSG_CHECKSUM	BIT(7)
 
-/* SYNACK messages might be attached to request sockets.
+/**
+ * sk_to_full_sk - Access to a full socket
+ * @sk: pointer to a socket
+ *
+ * SYNACK messages might be attached to request sockets.
  * Some places want to reach the listener in this case.
  */
-static inline struct sock *skb_to_full_sk(const struct sk_buff *skb)
+static inline struct sock *sk_to_full_sk(struct sock *sk)
 {
-	struct sock *sk = skb->sk;
-
+#ifdef CONFIG_INET
 	if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
 		sk = inet_reqsk(sk)->rsk_listener;
+#endif
+	return sk;
+}
+
+/* sk_to_full_sk() variant with a const argument */
+static inline const struct sock *sk_const_to_full_sk(const struct sock *sk)
+{
+#ifdef CONFIG_INET
+	if (sk && sk->sk_state == TCP_NEW_SYN_RECV)
+		sk = ((const struct request_sock *)sk)->rsk_listener;
+#endif
 	return sk;
 }
 
+static inline struct sock *skb_to_full_sk(const struct sk_buff *skb)
+{
+	return sk_to_full_sk(skb->sk);
+}
+
 static inline struct inet_sock *inet_sk(const struct sock *sk)
 {
 	return (struct inet_sock *)sk;
-- 
cgit v1.2.1


From 533708867dd6388f643f12c87465b59e732d729d Mon Sep 17 00:00:00 2001
From: Hal Rosenstock <hal@dev.mellanox.co.il>
Date: Fri, 13 Nov 2015 15:22:22 -0500
Subject: IB/mad: Require CM send method for everything except ClassPortInfo

Receipt of CM MAD with other than the Send method for an attribute
other than the ClassPortInfo attribute is invalid.

CM attributes other than ClassPortInfo only use the send method.

The SRP initiator does not maintain a timeout policy for CM connect
requests relies on the CM layer to do that. The result was that
the SRP initiator hung as the connect request never completed.

A new SRP target has been observed to respond to Send CM REQ
with GetResp of CM REQ with bad status. This is non conformant
with IBA spec but exposes a vulnerability in the current MAD/CM
code which will respond to the incoming GetResp of CM REQ as if
it was a valid incoming Send of CM REQ rather than tossing
this on the floor. It also causes the MAD layer not to
retransmit the original REQ even though it has not received a REP.

Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Hal Rosenstock <hal@mellanox.com>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/rdma/ib_mad.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 188df91d5851..ec9b44dd3d80 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -237,6 +237,8 @@ struct ib_vendor_mad {
 	u8			data[IB_MGMT_VENDOR_DATA];
 };
 
+#define IB_MGMT_CLASSPORTINFO_ATTR_ID	cpu_to_be16(0x0001)
+
 struct ib_class_port_info {
 	u8			base_version;
 	u8			class_version;
-- 
cgit v1.2.1


From a5e14ba334e202c58e45ef47414ec94c585c1a8c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 28 Oct 2015 13:28:15 +0200
Subject: mlx4: Expose correct max_sge_rd limit

mlx4 devices (ConnectX-2, ConnectX-3) has a limitation
where rdma read work queue entries cannot exceed 512 bytes.
A rdma_read wqe needs to fit in 512 bytes:
- wqe control segment (16 bytes)
- rdma segment (16 bytes)
- scatter elements (16 bytes each)

So max_sge_rd should be: (512 - 16 - 16) / 16 = 30.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/linux/mlx4/device.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7501626ab529..d3133be12d92 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -426,6 +426,17 @@ enum {
 	MLX4_MAX_FAST_REG_PAGES = 511,
 };
 
+enum {
+	/*
+	 * Max wqe size for rdma read is 512 bytes, so this
+	 * limits our max_sge_rd as the wqe needs to fit:
+	 * - ctrl segment (16 bytes)
+	 * - rdma segment (16 bytes)
+	 * - scatter elements (16 bytes each)
+	 */
+	MLX4_MAX_SGE_RD	= (512 - 16 - 16) / 16
+};
+
 enum {
 	MLX4_DEV_PMC_SUBTYPE_GUID_INFO	 = 0x14,
 	MLX4_DEV_PMC_SUBTYPE_PORT_INFO	 = 0x15,
-- 
cgit v1.2.1


From 4c3141e09cfa6460bfcd5e90f73e498db654c917 Mon Sep 17 00:00:00 2001
From: Carlo Caione <carlo@endlessm.com>
Date: Tue, 1 Dec 2015 17:24:17 +0100
Subject: of/irq: Export of_irq_find_parent again

of_irq_find_parent was made static since it had no users outside of
of_irq.c. Export it again since we are going to use it again.

Signed-off-by: Carlo Caione <carlo@endlessm.com>
[robh: move of_irq_find_parent to correct ifdef section]
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_irq.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 039f2eec49ce..f648acf27ed7 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -46,6 +46,7 @@ extern int of_irq_get(struct device_node *dev, int index);
 extern int of_irq_get_byname(struct device_node *dev, const char *name);
 extern int of_irq_to_resource_table(struct device_node *dev,
 		struct resource *res, int nr_irqs);
+extern struct device_node *of_irq_find_parent(struct device_node *child);
 extern struct irq_domain *of_msi_get_domain(struct device *dev,
 					    struct device_node *np,
 					    enum irq_domain_bus_token token);
@@ -70,6 +71,11 @@ static inline int of_irq_to_resource_table(struct device_node *dev,
 {
 	return 0;
 }
+static inline void *of_irq_find_parent(struct device_node *child)
+{
+	return NULL;
+}
+
 static inline struct irq_domain *of_msi_get_domain(struct device *dev,
 						   struct device_node *np,
 						   enum irq_domain_bus_token token)
-- 
cgit v1.2.1


From eaddb5725357e9f05ffe5d271630f8197d089da4 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 9 Dec 2015 09:11:10 -0600
Subject: of/irq: move of_msi_map_rid declaration to the correct ifdef section

In checking fixes for of_irq_find_parent declaration location, I found
that of_msi_map_rid is also wrong. of_msi_map_rid is not implemented for
Sparc, so it should not be in the Sparc specific section of the header.
Move it to just depend on OF_IRQ.

Cc: Frank Rowand <frowand.list@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_irq.h | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index f648acf27ed7..1e0deb8e8494 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -53,6 +53,7 @@ extern struct irq_domain *of_msi_get_domain(struct device *dev,
 extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
 						       u32 rid);
 extern void of_msi_configure(struct device *dev, struct device_node *np);
+u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
 #else
 static inline int of_irq_count(struct device_node *dev)
 {
@@ -90,6 +91,11 @@ static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev
 static inline void of_msi_configure(struct device *dev, struct device_node *np)
 {
 }
+static inline u32 of_msi_map_rid(struct device *dev,
+				 struct device_node *msi_np, u32 rid_in)
+{
+	return rid_in;
+}
 #endif
 
 #if defined(CONFIG_OF_IRQ) || defined(CONFIG_SPARC)
@@ -99,7 +105,6 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np)
  * so declare it here regardless of the CONFIG_OF_IRQ setting.
  */
 extern unsigned int irq_of_parse_and_map(struct device_node *node, int index);
-u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in);
 
 #else /* !CONFIG_OF && !CONFIG_SPARC */
 static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
@@ -107,12 +112,6 @@ static inline unsigned int irq_of_parse_and_map(struct device_node *dev,
 {
 	return 0;
 }
-
-static inline u32 of_msi_map_rid(struct device *dev,
-				 struct device_node *msi_np, u32 rid_in)
-{
-	return rid_in;
-}
 #endif /* !CONFIG_OF */
 
 #endif /* __OF_IRQ_H */
-- 
cgit v1.2.1


From d7e35dfa2531b53618b9e6edcd8752ce988ac555 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 3 Dec 2015 22:04:01 -0500
Subject: bitops.h: correctly handle rol32 with 0 byte shift

ROL on a 32 bit integer with a shift of 32 or more is undefined and the
result is arch-dependent. Avoid this by handling the trivial case of
roling by 0 correctly.

The trivial solution of checking if shift is 0 breaks gcc's detection
of this code as a ROL instruction, which is unacceptable.

This bug was reported and fixed in GCC
(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57157):

	The standard rotate idiom,

	  (x << n) | (x >> (32 - n))

	is recognized by gcc (for concreteness, I discuss only the case that x
	is an uint32_t here).

	However, this is portable C only for n in the range 0 < n < 32. For n
	== 0, we get x >> 32 which gives undefined behaviour according to the
	C standard (6.5.7, Bitwise shift operators). To portably support n ==
	0, one has to write the rotate as something like

	  (x << n) | (x >> ((-n) & 31))

	And this is apparently not recognized by gcc.

Note that this is broken on older GCCs and will result in slower ROL.

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 2b8ed123ad36..defeaac0745f 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -107,7 +107,7 @@ static inline __u64 ror64(__u64 word, unsigned int shift)
  */
 static inline __u32 rol32(__u32 word, unsigned int shift)
 {
-	return (word << shift) | (word >> (32 - shift));
+	return (word << shift) | (word >> ((-shift) & 31));
 }
 
 /**
-- 
cgit v1.2.1


From ecb7deceff2a51d3be50518969bc06411f485a62 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 9 Dec 2015 10:18:10 +0200
Subject: dmaengine: edma: DT: Change memcpy channel array from 16bit to 32bit
 type

This change makes the DT file to be easier to read since the memcpy
channels array does not need the '/bits/ 16' to be specified, which might
confuse some people.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/platform_data/edma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index e2878baeb90e..4299f4ba03bd 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -72,7 +72,7 @@ struct edma_soc_info {
 	struct edma_rsv_info	*rsv;
 
 	/* List of channels allocated for memcpy, terminated with -1 */
-	s16			*memcpy_channels;
+	s32			*memcpy_channels;
 
 	s8	(*queue_priority_mapping)[2];
 	const s16	(*xbar_chans)[2];
-- 
cgit v1.2.1


From 633c9a840d0bf1cce690f3165bdacd8ab412949e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 9 Dec 2015 12:08:26 +0100
Subject: netfilter: nfnetlink: avoid recurrent netns lookups in call_batch

Pass the net pointer to the call_batch callback functions so we can skip
recurrent lookups.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Tested-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
---
 include/linux/netfilter/nfnetlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 249d1bb01e03..5646b24bfc64 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -14,7 +14,7 @@ struct nfnl_callback {
 	int (*call_rcu)(struct sock *nl, struct sk_buff *skb, 
 		    const struct nlmsghdr *nlh,
 		    const struct nlattr * const cda[]);
-	int (*call_batch)(struct sock *nl, struct sk_buff *skb,
+	int (*call_batch)(struct net *net, struct sock *nl, struct sk_buff *skb,
 			  const struct nlmsghdr *nlh,
 			  const struct nlattr * const cda[]);
 	const struct nla_policy *policy;	/* netlink attribute policy */
-- 
cgit v1.2.1


From 059393c5bdd1420bdf1bed2972f33196dff263ae Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 7 Dec 2015 10:11:11 +0000
Subject: irqchip/gic-v3: Add missing struct device_node declaration

When the GICv3 header file is used in a C file that doesn't include
any of the OF stuff, we end up with a bunch of ugly warnings.

Let's keep GCC quiet by adding a forward declaration.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Jason Cooper <jason@lakedaemon.net>
Link: http://lkml.kernel.org/r/1449483072-17694-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqchip/arm-gic-v3.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index c9ae0c6ec050..d5d798b35c1f 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -330,6 +330,7 @@ struct rdists {
 };
 
 struct irq_domain;
+struct device_node;
 int its_cpu_init(void);
 int its_init(struct device_node *node, struct rdists *rdists,
 	     struct irq_domain *domain);
-- 
cgit v1.2.1


From 5e1033561da1152c57b97ee84371dba2b3d64c25 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Fri, 11 Dec 2015 09:16:38 -0800
Subject: ses: fix additional element traversal bug

KASAN found that our additional element processing scripts drop off
the end of the VPD page into unallocated space.  The reason is that
not every element has additional information but our traversal
routines think they do, leading to them expecting far more additional
information than is present.  Fix this by adding a gate to the
traversal routine so that it only processes elements that are expected
to have additional information (list is in SES-2 section 6.1.13.1:
Additional Element Status diagnostic page overview)

Reported-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Tested-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Cc: stable@vger.kernel.org
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 include/linux/enclosure.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h
index 7be22da321f3..a4cf57cd0f75 100644
--- a/include/linux/enclosure.h
+++ b/include/linux/enclosure.h
@@ -29,7 +29,11 @@
 /* A few generic types ... taken from ses-2 */
 enum enclosure_component_type {
 	ENCLOSURE_COMPONENT_DEVICE = 0x01,
+	ENCLOSURE_COMPONENT_CONTROLLER_ELECTRONICS = 0x07,
+	ENCLOSURE_COMPONENT_SCSI_TARGET_PORT = 0x14,
+	ENCLOSURE_COMPONENT_SCSI_INITIATOR_PORT = 0x15,
 	ENCLOSURE_COMPONENT_ARRAY_DEVICE = 0x17,
+	ENCLOSURE_COMPONENT_SAS_EXPANDER = 0x18,
 };
 
 /* ses-2 common element status */
-- 
cgit v1.2.1


From ad87e03213b552a5c33d5e1e7a19a73768397010 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 10 Dec 2015 15:27:21 -0500
Subject: USB: add quirk for devices with broken LPM

Some USB device / host controller combinations seem to have problems
with Link Power Management.  For example, Steinar found that his xHCI
controller wouldn't handle bandwidth calculations correctly for two
video cards simultaneously when LPM was enabled, even though the bus
had plenty of bandwidth available.

This patch introduces a new quirk flag for devices that should remain
disabled for LPM, and creates quirk entries for Steinar's devices.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/quirks.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 9948c874e3f1..1d0043dc34e4 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -47,4 +47,7 @@
 /* device generates spurious wakeup, ignore remote wakeup capability */
 #define USB_QUIRK_IGNORE_REMOTE_WAKEUP		BIT(9)
 
+/* device can't handle Link Power Management */
+#define USB_QUIRK_NO_LPM			BIT(10)
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.1


From 56f047305dd4b6b61771ac4f523718e4111052a8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 8 Dec 2015 07:22:01 -0800
Subject: xfrm: add rcu grace period in xfrm_policy_destroy()

We will soon switch sk->sk_policy[] to RCU protection,
as SYNACK packets are sent while listener socket is not locked.

This patch simply adds RCU grace period before struct xfrm_policy
freeing, and the corresponding rcu_head in struct xfrm_policy.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4a9c21f9b4ea..8bae1ef647cd 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -548,6 +548,7 @@ struct xfrm_policy {
 	u16			family;
 	struct xfrm_sec_ctx	*security;
 	struct xfrm_tmpl       	xfrm_vec[XFRM_MAX_DEPTH];
+	struct rcu_head		rcu;
 };
 
 static inline struct net *xp_net(const struct xfrm_policy *xp)
-- 
cgit v1.2.1


From d188ba86dd07a72ebebfa22fe9cb0b0572e57740 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 8 Dec 2015 07:22:02 -0800
Subject: xfrm: add rcu protection to sk->sk_policy[]

XFRM can deal with SYNACK messages, sent while listener socket
is not locked. We add proper rcu protection to __xfrm_sk_clone_policy()
and xfrm_sk_policy_lookup()

This might serve as the first step to remove xfrm.xfrm_policy_lock
use in fast path.

Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  2 +-
 include/net/xfrm.h | 24 +++++++++++++++---------
 2 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index b1d475b5db68..eaef41433d7a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -388,7 +388,7 @@ struct sock {
 		struct socket_wq	*sk_wq_raw;
 	};
 #ifdef CONFIG_XFRM
-	struct xfrm_policy	*sk_policy[2];
+	struct xfrm_policy __rcu *sk_policy[2];
 #endif
 	struct dst_entry	*sk_rx_dst;
 	struct dst_entry __rcu	*sk_dst_cache;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 8bae1ef647cd..d6f6e5006ee9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1142,12 +1142,14 @@ static inline int xfrm6_route_forward(struct sk_buff *skb)
 	return xfrm_route_forward(skb, AF_INET6);
 }
 
-int __xfrm_sk_clone_policy(struct sock *sk);
+int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk);
 
-static inline int xfrm_sk_clone_policy(struct sock *sk)
+static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
 {
-	if (unlikely(sk->sk_policy[0] || sk->sk_policy[1]))
-		return __xfrm_sk_clone_policy(sk);
+	sk->sk_policy[0] = NULL;
+	sk->sk_policy[1] = NULL;
+	if (unlikely(osk->sk_policy[0] || osk->sk_policy[1]))
+		return __xfrm_sk_clone_policy(sk, osk);
 	return 0;
 }
 
@@ -1155,12 +1157,16 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir);
 
 static inline void xfrm_sk_free_policy(struct sock *sk)
 {
-	if (unlikely(sk->sk_policy[0] != NULL)) {
-		xfrm_policy_delete(sk->sk_policy[0], XFRM_POLICY_MAX);
+	struct xfrm_policy *pol;
+
+	pol = rcu_dereference_protected(sk->sk_policy[0], 1);
+	if (unlikely(pol != NULL)) {
+		xfrm_policy_delete(pol, XFRM_POLICY_MAX);
 		sk->sk_policy[0] = NULL;
 	}
-	if (unlikely(sk->sk_policy[1] != NULL)) {
-		xfrm_policy_delete(sk->sk_policy[1], XFRM_POLICY_MAX+1);
+	pol = rcu_dereference_protected(sk->sk_policy[1], 1);
+	if (unlikely(pol != NULL)) {
+		xfrm_policy_delete(pol, XFRM_POLICY_MAX+1);
 		sk->sk_policy[1] = NULL;
 	}
 }
@@ -1170,7 +1176,7 @@ void xfrm_garbage_collect(struct net *net);
 #else
 
 static inline void xfrm_sk_free_policy(struct sock *sk) {}
-static inline int xfrm_sk_clone_policy(struct sock *sk) { return 0; }
+static inline int xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { return 0; }
 static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }  
 static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } 
 static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
-- 
cgit v1.2.1


From f7fc6bc414121954c45c5f18b70e2a8717d0d5b4 Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Thu, 10 Dec 2015 09:14:20 -0800
Subject: uapi: export ila.h

The file ila.h used for lightweight tunnels is being used by iproute2
but is not exported yet.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 628e6e64c2fb..c2e5d6cb34e3 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -186,6 +186,7 @@ header-y += if_tunnel.h
 header-y += if_vlan.h
 header-y += if_x25.h
 header-y += igmp.h
+header-y += ila.h
 header-y += in6.h
 header-y += inet_diag.h
 header-y += in.h
-- 
cgit v1.2.1


From 98e89cf02aed11166698dd53c6f14865613babb3 Mon Sep 17 00:00:00 2001
From: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Date: Fri, 11 Dec 2015 13:40:43 -0800
Subject: mm: kmemleak: mark kmemleak_init prototype as __init

The kmemleak_init() definition in mm/kmemleak.c is marked __init but its
prototype in include/linux/kmemleak.h is marked __ref since commit
a6186d89c913 ("kmemleak: Mark the early log buffer as __initdata").

This causes a section mismatch which is reported as a warning when
building with clang -Wsection, because kmemleak_init() is declared in
section .ref.text but defined in .init.text.

Fix this by marking kmemleak_init() prototype __init.

Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemleak.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index d0a1f99e24e3..4894c6888bc6 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -25,7 +25,7 @@
 
 #ifdef CONFIG_DEBUG_KMEMLEAK
 
-extern void kmemleak_init(void) __ref;
+extern void kmemleak_init(void) __init;
 extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
 			   gfp_t gfp) __ref;
 extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
-- 
cgit v1.2.1


From 86fffe4a61dd972d5a4e23260d530be6da02f614 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 11 Dec 2015 13:40:46 -0800
Subject: kernel: remove stop_machine() Kconfig dependency

Currently the full stop_machine() routine is only enabled on SMP if
module unloading is enabled, or if the CPUs are hotpluggable.  This
leads to configurations where stop_machine() is broken as it will then
only run the callback on the local CPU with irqs disabled, and not stop
the other CPUs or run the callback on them.

For example, this breaks MTRR setup on x86 in certain configs since
ea8596bb2d8d379 ("kprobes/x86: Remove unused text_poke_smp() and
text_poke_smp_batch() functions") as the MTRR is only established on the
boot CPU.

This patch removes the Kconfig option for STOP_MACHINE and uses the SMP
and HOTPLUG_CPU config options to compile the correct stop_machine() for
the architecture, removing the false dependency on MODULE_UNLOAD in the
process.

Link: https://lkml.org/lkml/2014/10/8/124
References: https://bugs.freedesktop.org/show_bug.cgi?id=84794
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Pranith Kumar <bobby.prani@gmail.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: H. Peter Anvin <hpa@linux.intel.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Iulia Manda <iulia.manda21@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Chuck Ebbert <cebbert.lkml@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stop_machine.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 0adedca24c5b..0e1b1540597a 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -99,7 +99,7 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
  * grabbing every spinlock (and more).  So the "read" side to such a
  * lock is anything which disables preemption.
  */
-#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
 
 /**
  * stop_machine: freeze the machine on all CPUs and run this function
@@ -118,7 +118,7 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
 
 int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 				   const struct cpumask *cpus);
-#else	 /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
 
 static inline int stop_machine(cpu_stop_fn_t fn, void *data,
 				 const struct cpumask *cpus)
@@ -137,5 +137,5 @@ static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
 	return stop_machine(fn, data, cpus);
 }
 
-#endif	/* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#endif	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
 #endif	/* _LINUX_STOP_MACHINE */
-- 
cgit v1.2.1


From dfd01f026058a59a513f8a365b439a0681b803af Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sun, 13 Dec 2015 22:11:16 +0100
Subject: sched/wait: Fix the signal handling fix

Jan Stancek reported that I wrecked things for him by fixing things for
Vladimir :/

His report was due to an UNINTERRUPTIBLE wait getting -EINTR, which
should not be possible, however my previous patch made this possible by
unconditionally checking signal_pending().

We cannot use current->state as was done previously, because the
instruction after the store to that variable it can be changed.  We must
instead pass the initial state along and use that.

Fixes: 68985633bccb ("sched/wait: Fix signal handling in bit wait helpers")
Reported-by: Jan Stancek <jstancek@redhat.com>
Reported-by: Chris Mason <clm@fb.com>
Tested-by: Jan Stancek <jstancek@redhat.com>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Chris Mason <clm@fb.com>
Reviewed-by: Paul Turner <pjt@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: tglx@linutronix.de
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: hpa@zytor.com
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/wait.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 1e1bf9f963a9..513b36f04dfd 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -145,7 +145,7 @@ __remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old)
 	list_del(&old->task_list);
 }
 
-typedef int wait_bit_action_f(struct wait_bit_key *);
+typedef int wait_bit_action_f(struct wait_bit_key *, int mode);
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
@@ -960,10 +960,10 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 	} while (0)
 
 
-extern int bit_wait(struct wait_bit_key *);
-extern int bit_wait_io(struct wait_bit_key *);
-extern int bit_wait_timeout(struct wait_bit_key *);
-extern int bit_wait_io_timeout(struct wait_bit_key *);
+extern int bit_wait(struct wait_bit_key *, int);
+extern int bit_wait_io(struct wait_bit_key *, int);
+extern int bit_wait_timeout(struct wait_bit_key *, int);
+extern int bit_wait_io_timeout(struct wait_bit_key *, int);
 
 /**
  * wait_on_bit - wait for a bit to be cleared
-- 
cgit v1.2.1


From e5f5d74747afa799bff109644be04b00af36043e Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 14 Dec 2015 14:29:58 +0100
Subject: openvswitch: fix trivial comment typo

The commit 33db4125ec74 ("openvswitch: Rename LABEL->LABELS") left
over an old OVS_CT_ATTR_LABEL instance, fix it.

Fixes: 33db4125ec74 ("openvswitch: Rename LABEL->LABELS")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 28ccedd000f5..a27222d5b413 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -628,7 +628,7 @@ struct ovs_action_hash {
  * @OVS_CT_ATTR_MARK: u32 value followed by u32 mask. For each bit set in the
  * mask, the corresponding bit in the value is copied to the connection
  * tracking mark field in the connection.
- * @OVS_CT_ATTR_LABEL: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN
+ * @OVS_CT_ATTR_LABELS: %OVS_CT_LABELS_LEN value followed by %OVS_CT_LABELS_LEN
  * mask. For each bit set in the mask, the corresponding bit in the value is
  * copied to the connection tracking label field in the connection.
  * @OVS_CT_ATTR_HELPER: variable length string defining conntrack ALG.
-- 
cgit v1.2.1


From 79462ad02e861803b3840cc782248c7359451cd9 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 14 Dec 2015 22:03:39 +0100
Subject: net: add validation for the socket syscall protocol argument
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

郭永刚 reported that one could simply crash the kernel as root by
using a simple program:

	int socket_fd;
	struct sockaddr_in addr;
	addr.sin_port = 0;
	addr.sin_addr.s_addr = INADDR_ANY;
	addr.sin_family = 10;

	socket_fd = socket(10,3,0x40000000);
	connect(socket_fd , &addr,16);

AF_INET, AF_INET6 sockets actually only support 8-bit protocol
identifiers. inet_sock's skc_protocol field thus is sized accordingly,
thus larger protocol identifiers simply cut off the higher bits and
store a zero in the protocol fields.

This could lead to e.g. NULL function pointer because as a result of
the cut off inet_num is zero and we call down to inet_autobind, which
is NULL for raw sockets.

kernel: Call Trace:
kernel:  [<ffffffff816db90e>] ? inet_autobind+0x2e/0x70
kernel:  [<ffffffff816db9a4>] inet_dgram_connect+0x54/0x80
kernel:  [<ffffffff81645069>] SYSC_connect+0xd9/0x110
kernel:  [<ffffffff810ac51b>] ? ptrace_notify+0x5b/0x80
kernel:  [<ffffffff810236d8>] ? syscall_trace_enter_phase2+0x108/0x200
kernel:  [<ffffffff81645e0e>] SyS_connect+0xe/0x10
kernel:  [<ffffffff81779515>] tracesys_phase2+0x84/0x89

I found no particular commit which introduced this problem.

CVE: CVE-2015-8543
Cc: Cong Wang <cwang@twopensource.com>
Reported-by: 郭永刚 <guoyonggang@360.cn>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index eaef41433d7a..c4205e0a3a2d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -403,6 +403,7 @@ struct sock {
 				sk_no_check_rx : 1,
 				sk_userlocks : 4,
 				sk_protocol  : 8,
+#define SK_PROTOCOL_MAX U8_MAX
 				sk_type      : 16;
 	kmemcheck_bitfield_end(flags);
 	int			sk_wmem_queued;
-- 
cgit v1.2.1


From 5037e9ef9454917b047f9f3a19b4dd179fbf7cd4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 Dec 2015 14:08:53 -0800
Subject: net: fix IP early demux races

David Wilder reported crashes caused by dst reuse.

<quote David>
  I am seeing a crash on a distro V4.2.3 kernel caused by a double
  release of a dst_entry.  In ipv4_dst_destroy() the call to
  list_empty() finds a poisoned next pointer, indicating the dst_entry
  has already been removed from the list and freed. The crash occurs
  18 to 24 hours into a run of a network stress exerciser.
</quote>

Thanks to his detailed report and analysis, we were able to understand
the core issue.

IP early demux can associate a dst to skb, after a lookup in TCP/UDP
sockets.

When socket cache is not properly set, we want to store into
sk->sk_dst_cache the dst for future IP early demux lookups,
by acquiring a stable refcount on the dst.

Problem is this acquisition is simply using an atomic_inc(),
which works well, unless the dst was queued for destruction from
dst_release() noticing dst refcount went to zero, if DST_NOCACHE
was set on dst.

We need to make sure current refcount is not zero before incrementing
it, or risk double free as David reported.

This patch, being a stable candidate, adds two new helpers, and use
them only from IP early demux problematic paths.

It might be possible to merge in net-next skb_dst_force() and
skb_dst_force_safe(), but I prefer having the smallest patch for stable
kernels : Maybe some skb_dst_force() callers do not expect skb->dst
can suddenly be cleared.

Can probably be backported back to linux-3.6 kernels

Reported-by: David J. Wilder <dwilder@us.ibm.com>
Tested-by: David J. Wilder <dwilder@us.ibm.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h  | 33 +++++++++++++++++++++++++++++++++
 include/net/sock.h |  2 +-
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index 1279f9b09791..c7329dcd90cc 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -322,6 +322,39 @@ static inline void skb_dst_force(struct sk_buff *skb)
 	}
 }
 
+/**
+ * dst_hold_safe - Take a reference on a dst if possible
+ * @dst: pointer to dst entry
+ *
+ * This helper returns false if it could not safely
+ * take a reference on a dst.
+ */
+static inline bool dst_hold_safe(struct dst_entry *dst)
+{
+	if (dst->flags & DST_NOCACHE)
+		return atomic_inc_not_zero(&dst->__refcnt);
+	dst_hold(dst);
+	return true;
+}
+
+/**
+ * skb_dst_force_safe - makes sure skb dst is refcounted
+ * @skb: buffer
+ *
+ * If dst is not yet refcounted and not destroyed, grab a ref on it.
+ */
+static inline void skb_dst_force_safe(struct sk_buff *skb)
+{
+	if (skb_dst_is_noref(skb)) {
+		struct dst_entry *dst = skb_dst(skb);
+
+		if (!dst_hold_safe(dst))
+			dst = NULL;
+
+		skb->_skb_refdst = (unsigned long)dst;
+	}
+}
+
 
 /**
  *	__skb_tunnel_rx - prepare skb for rx reinsert
diff --git a/include/net/sock.h b/include/net/sock.h
index c4205e0a3a2d..28790fe18206 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -817,7 +817,7 @@ void sk_stream_write_space(struct sock *sk);
 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	/* dont let skb dst not refcounted, we are going to leave rcu lock */
-	skb_dst_force(skb);
+	skb_dst_force_safe(skb);
 
 	if (!sk->sk_backlog.tail)
 		sk->sk_backlog.head = skb;
-- 
cgit v1.2.1


From 887dc9f2cef6e98dcccf807da5e6faf4f60ba483 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 15 Dec 2015 20:56:44 -0800
Subject: inet: tcp: fix inetpeer_set_addr_v4()

David Ahern added a vif field in the a4 part of inetpeer_addr struct.

This broke IPv4 TCP fast open client side and more generally tcp metrics
cache, because inetpeer_addr_cmp() is now comparing two u32 instead of
one.

inetpeer_set_addr_v4() needs to properly init vif field, otherwise
the comparison result depends on uninitialized data.

Fixes: 192132b9a034 ("net: Add support for VRFs to inetpeer cache")
Reported-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 4a6009d4486b..235c7811a86a 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -78,6 +78,7 @@ void inet_initpeers(void) __init;
 static inline void inetpeer_set_addr_v4(struct inetpeer_addr *iaddr, __be32 ip)
 {
 	iaddr->a4.addr = ip;
+	iaddr->a4.vif = 0;
 	iaddr->family = AF_INET;
 }
 
-- 
cgit v1.2.1


From 7bbadd2d1009575dad675afc16650ebb5aa10612 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 14 Dec 2015 23:30:43 +0100
Subject: net: fix warnings in 'make htmldocs' by moving macro definition out
 of field declaration

Docbook does not like the definition of macros inside a field declaration
and adds a warning. Move the definition out.

Fixes: 79462ad02e86180 ("net: add validation for the socket syscall protocol argument")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 28790fe18206..14d3c0734007 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -403,8 +403,8 @@ struct sock {
 				sk_no_check_rx : 1,
 				sk_userlocks : 4,
 				sk_protocol  : 8,
-#define SK_PROTOCOL_MAX U8_MAX
 				sk_type      : 16;
+#define SK_PROTOCOL_MAX U8_MAX
 	kmemcheck_bitfield_end(flags);
 	int			sk_wmem_queued;
 	gfp_t			sk_allocation;
-- 
cgit v1.2.1


From 454d5d882c7e412b840e3c99010fe81a9862f6fb Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Fri, 30 Oct 2015 14:58:08 +0000
Subject: xen: Add RING_COPY_REQUEST()

Using RING_GET_REQUEST() on a shared ring is easy to use incorrectly
(i.e., by not considering that the other end may alter the data in the
shared ring while it is being inspected).  Safe usage of a request
generally requires taking a local copy.

Provide a RING_COPY_REQUEST() macro to use instead of
RING_GET_REQUEST() and an open-coded memcpy().  This takes care of
ensuring that the copy is done correctly regardless of any possible
compiler optimizations.

Use a volatile source to prevent the compiler from reordering or
omitting the copy.

This is part of XSA155.

CC: stable@vger.kernel.org
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/xen/interface/io/ring.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
index 7d28aff605c7..7dc685b4057d 100644
--- a/include/xen/interface/io/ring.h
+++ b/include/xen/interface/io/ring.h
@@ -181,6 +181,20 @@ struct __name##_back_ring {						\
 #define RING_GET_REQUEST(_r, _idx)					\
     (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
 
+/*
+ * Get a local copy of a request.
+ *
+ * Use this in preference to RING_GET_REQUEST() so all processing is
+ * done on a local copy that cannot be modified by the other end.
+ *
+ * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this
+ * to be ineffective where _req is a struct which consists of only bitfields.
+ */
+#define RING_COPY_REQUEST(_r, _idx, _req) do {				\
+	/* Use volatile to force the copy into _req. */			\
+	*(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx);	\
+} while (0)
+
 #define RING_GET_RESPONSE(_r, _idx)					\
     (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
 
-- 
cgit v1.2.1


From 1d5cda4076d930d6d52088ed2c7753f7c564cbd7 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 18 Dec 2015 14:22:07 -0800
Subject: include/linux/mmdebug.h: should include linux/bug.h

mmdebug.h uses BUILD_BUG_ON_INVALID(), assuming someone else included
linux/bug.h.  Include it ourselves.

This saves build-failures such as:

  arch/arm64/include/asm/pgtable.h: In function 'set_pte_at':
  arch/arm64/include/asm/pgtable.h:281:3: error: implicit declaration of function 'BUILD_BUG_ON_INVALID' [-Werror=implicit-function-declaration]
   VM_WARN_ONCE(!pte_young(pte),

Fixes: 02602a18c32d7 ("bug: completely remove code generated by disabled VM_BUG_ON()")
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 877ef226f90f..772362adf471 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -1,6 +1,7 @@
 #ifndef LINUX_MM_DEBUG_H
 #define LINUX_MM_DEBUG_H 1
 
+#include <linux/bug.h>
 #include <linux/stringify.h>
 
 struct page;
-- 
cgit v1.2.1